diff options
Diffstat (limited to 'thirdparty/embree-aarch64/kernels/bvh')
45 files changed, 0 insertions, 14202 deletions
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh.cpp deleted file mode 100644 index bd102bd6ef..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh.cpp +++ /dev/null @@ -1,190 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "bvh.h" -#include "bvh_statistics.h" - -namespace embree -{ - template<int N> - BVHN<N>::BVHN (const PrimitiveType& primTy, Scene* scene) - : AccelData((N==4) ? AccelData::TY_BVH4 : (N==8) ? AccelData::TY_BVH8 : AccelData::TY_UNKNOWN), - primTy(&primTy), device(scene->device), scene(scene), - root(emptyNode), alloc(scene->device,scene->isStaticAccel()), numPrimitives(0), numVertices(0) - { - } - - template<int N> - BVHN<N>::~BVHN () - { - for (size_t i=0; i<objects.size(); i++) - delete objects[i]; - } - - template<int N> - void BVHN<N>::clear() - { - set(BVHN::emptyNode,empty,0); - alloc.clear(); - } - - template<int N> - void BVHN<N>::set (NodeRef root, const LBBox3fa& bounds, size_t numPrimitives) - { - this->root = root; - this->bounds = bounds; - this->numPrimitives = numPrimitives; - } - - template<int N> - void BVHN<N>::clearBarrier(NodeRef& node) - { - if (node.isBarrier()) - node.clearBarrier(); - else if (!node.isLeaf()) { - BaseNode* n = node.baseNode(); // FIXME: flags should be stored in BVH - for (size_t c=0; c<N; c++) - clearBarrier(n->child(c)); - } - } - - template<int N> - void BVHN<N>::layoutLargeNodes(size_t num) - { -#if defined(__X86_64__) || defined(__aarch64__) // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues - struct NodeArea - { - __forceinline NodeArea() {} - - __forceinline NodeArea(NodeRef& node, const BBox3fa& bounds) - : node(&node), A(node.isLeaf() ? float(neg_inf) : area(bounds)) {} - - __forceinline bool operator< (const NodeArea& other) const { - return this->A < other.A; - } - - NodeRef* node; - float A; - }; - std::vector<NodeArea> lst; - lst.reserve(num); - lst.push_back(NodeArea(root,empty)); - - while (lst.size() < num) - { - std::pop_heap(lst.begin(), lst.end()); - NodeArea n = lst.back(); lst.pop_back(); - if (!n.node->isAABBNode()) break; - AABBNode* node = n.node->getAABBNode(); - for (size_t i=0; i<N; i++) { - if (node->child(i) == BVHN::emptyNode) continue; - lst.push_back(NodeArea(node->child(i),node->bounds(i))); - std::push_heap(lst.begin(), lst.end()); - } - } - - for (size_t i=0; i<lst.size(); i++) - lst[i].node->setBarrier(); - - root = layoutLargeNodesRecursion(root,alloc.getCachedAllocator()); -#endif - } - - template<int N> - typename BVHN<N>::NodeRef BVHN<N>::layoutLargeNodesRecursion(NodeRef& node, const FastAllocator::CachedAllocator& allocator) - { - if (node.isBarrier()) { - node.clearBarrier(); - return node; - } - else if (node.isAABBNode()) - { - AABBNode* oldnode = node.getAABBNode(); - AABBNode* newnode = (BVHN::AABBNode*) allocator.malloc0(sizeof(BVHN::AABBNode),byteNodeAlignment); - *newnode = *oldnode; - for (size_t c=0; c<N; c++) - newnode->child(c) = layoutLargeNodesRecursion(oldnode->child(c),allocator); - return encodeNode(newnode); - } - else return node; - } - - template<int N> - double BVHN<N>::preBuild(const std::string& builderName) - { - if (builderName == "") - return inf; - - if (device->verbosity(2)) - { - Lock<MutexSys> lock(g_printMutex); - std::cout << "building BVH" << N << (builderName.find("MBlur") != std::string::npos ? "MB" : "") << "<" << primTy->name() << "> using " << builderName << " ..." << std::endl << std::flush; - } - - double t0 = 0.0; - if (device->benchmark || device->verbosity(2)) t0 = getSeconds(); - return t0; - } - - template<int N> - void BVHN<N>::postBuild(double t0) - { - if (t0 == double(inf)) - return; - - double dt = 0.0; - if (device->benchmark || device->verbosity(2)) - dt = getSeconds()-t0; - - std::unique_ptr<BVHNStatistics<N>> stat; - - /* print statistics */ - if (device->verbosity(2)) - { - if (!stat) stat.reset(new BVHNStatistics<N>(this)); - const size_t usedBytes = alloc.getUsedBytes(); - Lock<MutexSys> lock(g_printMutex); - std::cout << "finished BVH" << N << "<" << primTy->name() << "> : " << 1000.0f*dt << "ms, " << 1E-6*double(numPrimitives)/dt << " Mprim/s, " << 1E-9*double(usedBytes)/dt << " GB/s" << std::endl; - - if (device->verbosity(2)) - std::cout << stat->str(); - - if (device->verbosity(2)) - { - FastAllocator::AllStatistics stat(&alloc); - for (size_t i=0; i<objects.size(); i++) - if (objects[i]) - stat = stat + FastAllocator::AllStatistics(&objects[i]->alloc); - - stat.print(numPrimitives); - } - - if (device->verbosity(3)) - { - alloc.print_blocks(); - for (size_t i=0; i<objects.size(); i++) - if (objects[i]) - objects[i]->alloc.print_blocks(); - } - - std::cout << std::flush; - } - - /* benchmark mode */ - if (device->benchmark) - { - if (!stat) stat.reset(new BVHNStatistics<N>(this)); - Lock<MutexSys> lock(g_printMutex); - std::cout << "BENCHMARK_BUILD " << dt << " " << double(numPrimitives)/dt << " " << stat->sah() << " " << stat->bytesUsed() << " BVH" << N << "<" << primTy->name() << ">" << std::endl << std::flush; - } - } - -#if defined(__AVX__) - template class BVHN<8>; -#endif - -#if !defined(__AVX__) || !defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42) || defined(__aarch64__) - template class BVHN<4>; -#endif -} - diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh.h b/thirdparty/embree-aarch64/kernels/bvh/bvh.h deleted file mode 100644 index 8fdf912e52..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh.h +++ /dev/null @@ -1,235 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -/* include all node types */ -#include "bvh_node_aabb.h" -#include "bvh_node_aabb_mb.h" -#include "bvh_node_aabb_mb4d.h" -#include "bvh_node_obb.h" -#include "bvh_node_obb_mb.h" -#include "bvh_node_qaabb.h" - -namespace embree -{ - /*! flags used to enable specific node types in intersectors */ - enum BVHNodeFlags - { - BVH_FLAG_ALIGNED_NODE = 0x00001, - BVH_FLAG_ALIGNED_NODE_MB = 0x00010, - BVH_FLAG_UNALIGNED_NODE = 0x00100, - BVH_FLAG_UNALIGNED_NODE_MB = 0x01000, - BVH_FLAG_QUANTIZED_NODE = 0x100000, - BVH_FLAG_ALIGNED_NODE_MB4D = 0x1000000, - - /* short versions */ - BVH_AN1 = BVH_FLAG_ALIGNED_NODE, - BVH_AN2 = BVH_FLAG_ALIGNED_NODE_MB, - BVH_AN2_AN4D = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D, - BVH_UN1 = BVH_FLAG_UNALIGNED_NODE, - BVH_UN2 = BVH_FLAG_UNALIGNED_NODE_MB, - BVH_MB = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_UNALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D, - BVH_AN1_UN1 = BVH_FLAG_ALIGNED_NODE | BVH_FLAG_UNALIGNED_NODE, - BVH_AN2_UN2 = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_UNALIGNED_NODE_MB, - BVH_AN2_AN4D_UN2 = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D | BVH_FLAG_UNALIGNED_NODE_MB, - BVH_QN1 = BVH_FLAG_QUANTIZED_NODE - }; - - /*! Multi BVH with N children. Each node stores the bounding box of - * it's N children as well as N child references. */ - template<int N> - class BVHN : public AccelData - { - ALIGNED_CLASS_(16); - public: - - /*! forward declaration of node ref type */ - typedef NodeRefPtr<N> NodeRef; - typedef BaseNode_t<NodeRef,N> BaseNode; - typedef AABBNode_t<NodeRef,N> AABBNode; - typedef AABBNodeMB_t<NodeRef,N> AABBNodeMB; - typedef AABBNodeMB4D_t<NodeRef,N> AABBNodeMB4D; - typedef OBBNode_t<NodeRef,N> OBBNode; - typedef OBBNodeMB_t<NodeRef,N> OBBNodeMB; - typedef QuantizedBaseNode_t<N> QuantizedBaseNode; - typedef QuantizedBaseNodeMB_t<N> QuantizedBaseNodeMB; - typedef QuantizedNode_t<NodeRef,N> QuantizedNode; - - /*! Number of bytes the nodes and primitives are minimally aligned to.*/ - static const size_t byteAlignment = 16; - static const size_t byteNodeAlignment = 4*N; - - /*! Empty node */ - static const size_t emptyNode = NodeRef::emptyNode; - - /*! Invalid node, used as marker in traversal */ - static const size_t invalidNode = NodeRef::invalidNode; - static const size_t popRay = NodeRef::popRay; - - /*! Maximum depth of the BVH. */ - static const size_t maxBuildDepth = 32; - static const size_t maxBuildDepthLeaf = maxBuildDepth+8; - static const size_t maxDepth = 2*maxBuildDepthLeaf; // 2x because of two level builder - - /*! Maximum number of primitive blocks in a leaf. */ - static const size_t maxLeafBlocks = NodeRef::maxLeafBlocks; - - public: - - /*! Builder interface to create allocator */ - struct CreateAlloc : public FastAllocator::Create { - __forceinline CreateAlloc (BVHN* bvh) : FastAllocator::Create(&bvh->alloc) {} - }; - - typedef BVHNodeRecord<NodeRef> NodeRecord; - typedef BVHNodeRecordMB<NodeRef> NodeRecordMB; - typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D; - - public: - - /*! BVHN default constructor. */ - BVHN (const PrimitiveType& primTy, Scene* scene); - - /*! BVHN destruction */ - ~BVHN (); - - /*! clears the acceleration structure */ - void clear(); - - /*! sets BVH members after build */ - void set (NodeRef root, const LBBox3fa& bounds, size_t numPrimitives); - - /*! Clears the barrier bits of a subtree. */ - void clearBarrier(NodeRef& node); - - /*! lays out num large nodes of the BVH */ - void layoutLargeNodes(size_t num); - NodeRef layoutLargeNodesRecursion(NodeRef& node, const FastAllocator::CachedAllocator& allocator); - - /*! called by all builders before build starts */ - double preBuild(const std::string& builderName); - - /*! called by all builders after build ended */ - void postBuild(double t0); - - /*! allocator class */ - struct Allocator { - BVHN* bvh; - Allocator (BVHN* bvh) : bvh(bvh) {} - __forceinline void* operator() (size_t bytes) const { - return bvh->alloc._threadLocal()->malloc(&bvh->alloc,bytes); - } - }; - - /*! post build cleanup */ - void cleanup() { - alloc.cleanup(); - } - - public: - - /*! Encodes a node */ - static __forceinline NodeRef encodeNode(AABBNode* node) { return NodeRef::encodeNode(node); } - static __forceinline NodeRef encodeNode(AABBNodeMB* node) { return NodeRef::encodeNode(node); } - static __forceinline NodeRef encodeNode(AABBNodeMB4D* node) { return NodeRef::encodeNode(node); } - static __forceinline NodeRef encodeNode(OBBNode* node) { return NodeRef::encodeNode(node); } - static __forceinline NodeRef encodeNode(OBBNodeMB* node) { return NodeRef::encodeNode(node); } - static __forceinline NodeRef encodeLeaf(void* tri, size_t num) { return NodeRef::encodeLeaf(tri,num); } - static __forceinline NodeRef encodeTypedLeaf(void* ptr, size_t ty) { return NodeRef::encodeTypedLeaf(ptr,ty); } - - public: - - /*! Prefetches the node this reference points to */ - __forceinline static void prefetch(const NodeRef ref, int types=0) - { -#if defined(__AVX512PF__) // MIC - if (types != BVH_FLAG_QUANTIZED_NODE) { - prefetchL2(((char*)ref.ptr)+0*64); - prefetchL2(((char*)ref.ptr)+1*64); - if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) { - prefetchL2(((char*)ref.ptr)+2*64); - prefetchL2(((char*)ref.ptr)+3*64); - } - if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) { - /* KNL still needs L2 prefetches for large nodes */ - prefetchL2(((char*)ref.ptr)+4*64); - prefetchL2(((char*)ref.ptr)+5*64); - prefetchL2(((char*)ref.ptr)+6*64); - prefetchL2(((char*)ref.ptr)+7*64); - } - } - else - { - /* todo: reduce if 32bit offsets are enabled */ - prefetchL2(((char*)ref.ptr)+0*64); - prefetchL2(((char*)ref.ptr)+1*64); - prefetchL2(((char*)ref.ptr)+2*64); - } -#else - if (types != BVH_FLAG_QUANTIZED_NODE) { - prefetchL1(((char*)ref.ptr)+0*64); - prefetchL1(((char*)ref.ptr)+1*64); - if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) { - prefetchL1(((char*)ref.ptr)+2*64); - prefetchL1(((char*)ref.ptr)+3*64); - } - if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) { - /* deactivate for large nodes on Xeon, as it introduces regressions */ - //prefetchL1(((char*)ref.ptr)+4*64); - //prefetchL1(((char*)ref.ptr)+5*64); - //prefetchL1(((char*)ref.ptr)+6*64); - //prefetchL1(((char*)ref.ptr)+7*64); - } - } - else - { - /* todo: reduce if 32bit offsets are enabled */ - prefetchL1(((char*)ref.ptr)+0*64); - prefetchL1(((char*)ref.ptr)+1*64); - prefetchL1(((char*)ref.ptr)+2*64); - } -#endif - } - - __forceinline static void prefetchW(const NodeRef ref, int types=0) - { - embree::prefetchEX(((char*)ref.ptr)+0*64); - embree::prefetchEX(((char*)ref.ptr)+1*64); - if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) { - embree::prefetchEX(((char*)ref.ptr)+2*64); - embree::prefetchEX(((char*)ref.ptr)+3*64); - } - if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) { - embree::prefetchEX(((char*)ref.ptr)+4*64); - embree::prefetchEX(((char*)ref.ptr)+5*64); - embree::prefetchEX(((char*)ref.ptr)+6*64); - embree::prefetchEX(((char*)ref.ptr)+7*64); - } - } - - /*! bvh type information */ - public: - const PrimitiveType* primTy; //!< primitive type stored in the BVH - - /*! bvh data */ - public: - Device* device; //!< device pointer - Scene* scene; //!< scene pointer - NodeRef root; //!< root node - FastAllocator alloc; //!< allocator used to allocate nodes - - /*! statistics data */ - public: - size_t numPrimitives; //!< number of primitives the BVH is build over - size_t numVertices; //!< number of vertices the BVH references - - /*! data arrays for special builders */ - public: - std::vector<BVHN*> objects; - vector_t<char,aligned_allocator<char,32>> subdiv_patches; - }; - - typedef BVHN<4> BVH4; - typedef BVHN<8> BVH8; -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.cpp deleted file mode 100644 index 23f4f63d45..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.cpp +++ /dev/null @@ -1,1325 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "bvh4_factory.h" -#include "../bvh/bvh.h" - -#include "../geometry/curveNv.h" -#include "../geometry/curveNi.h" -#include "../geometry/curveNi_mb.h" -#include "../geometry/linei.h" -#include "../geometry/triangle.h" -#include "../geometry/trianglev.h" -#include "../geometry/trianglev_mb.h" -#include "../geometry/trianglei.h" -#include "../geometry/quadv.h" -#include "../geometry/quadi.h" -#include "../geometry/subdivpatch1.h" -#include "../geometry/object.h" -#include "../geometry/instance.h" -#include "../geometry/subgrid.h" -#include "../common/accelinstance.h" - -namespace embree -{ - DECLARE_SYMBOL2(Accel::Collider,BVH4ColliderUserGeom); - - DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector4i,void); - DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector8i,void); - DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector4v,void); - DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector8v,void); - DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector4iMB,void); - DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector8iMB,void); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1MB); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1MB); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4Intersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vIntersector1Pluecker); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Pluecker); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Pluecker); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Pluecker); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Pluecker); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Pluecker); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Pluecker); - - DECLARE_SYMBOL2(Accel::Intersector1,QBVH4Triangle4iIntersector1Pluecker); - DECLARE_SYMBOL2(Accel::Intersector1,QBVH4Quad4iIntersector1Pluecker); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1Intersector1); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1MBIntersector1); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH4VirtualIntersector1); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4VirtualMBIntersector1); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH4InstanceIntersector1); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4InstanceMBIntersector1); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4GridMBIntersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Pluecker); - - DECLARE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4Hybrid); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4HybridMB); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4Hybrid); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4HybridMB); - - DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoellerNoFilter); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vIntersector4HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoellerNoFilter); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1Intersector4); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1MBIntersector4); - - DECLARE_SYMBOL2(Accel::Intersector4,BVH4VirtualIntersector4Chunk); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4VirtualMBIntersector4Chunk); - - DECLARE_SYMBOL2(Accel::Intersector4,BVH4InstanceIntersector4Chunk); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4InstanceMBIntersector4Chunk); - - DECLARE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4GridMBIntersector4HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8Hybrid); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8HybridMB); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8Hybrid); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8HybridMB); - - DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoellerNoFilter); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vIntersector8HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoellerNoFilter); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1Intersector8); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1MBIntersector8); - - DECLARE_SYMBOL2(Accel::Intersector8,BVH4VirtualIntersector8Chunk); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4VirtualMBIntersector8Chunk); - - DECLARE_SYMBOL2(Accel::Intersector8,BVH4InstanceIntersector8Chunk); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4InstanceMBIntersector8Chunk); - - DECLARE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4GridMBIntersector8HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16Hybrid); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16HybridMB); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16Hybrid); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16HybridMB); - - DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoellerNoFilter); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vIntersector16HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoellerNoFilter); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1Intersector16); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1MBIntersector16); - - DECLARE_SYMBOL2(Accel::Intersector16,BVH4VirtualIntersector16Chunk); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4VirtualMBIntersector16Chunk); - - DECLARE_SYMBOL2(Accel::Intersector16,BVH4InstanceIntersector16Chunk); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4InstanceMBIntersector16Chunk); - - DECLARE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4GridMBIntersector16HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridPluecker); - - DECLARE_SYMBOL2(Accel::IntersectorN,BVH4IntersectorStreamPacketFallback); - - DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4IntersectorStreamMoeller); - DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4IntersectorStreamMoellerNoFilter); - DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4iIntersectorStreamMoeller); - DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4vIntersectorStreamPluecker); - DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4iIntersectorStreamPluecker); - - DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4vIntersectorStreamMoeller); - DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4vIntersectorStreamMoellerNoFilter); - DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4iIntersectorStreamMoeller); - DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4vIntersectorStreamPluecker); - DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4iIntersectorStreamPluecker); - - DECLARE_SYMBOL2(Accel::IntersectorN,BVH4VirtualIntersectorStream); - DECLARE_SYMBOL2(Accel::IntersectorN,BVH4InstanceIntersectorStream); - - DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool); - DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool); - DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool); - DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool); - DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool); - DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool); - - DECLARE_ISA_FUNCTION(Builder*,BVH4Curve4vBuilder_OBB_New,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Curve4iBuilder_OBB_New,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4OBBCurve4iMBBuilder_OBB,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Curve8iBuilder_OBB_New,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t); - - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - - DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t); - - DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t); - - DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - - DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask); - DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask); - - DECLARE_ISA_FUNCTION(Builder*,BVH4GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - - DECLARE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1BuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1MBBuilderSAH,void* COMMA Scene* COMMA size_t); - - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshRefitSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshRefitSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t); - - BVH4Factory::BVH4Factory(int bfeatures, int ifeatures) - { - SELECT_SYMBOL_DEFAULT_AVX_AVX2(ifeatures,BVH4ColliderUserGeom); - - selectBuilders(bfeatures); - selectIntersectors(ifeatures); - } - - void BVH4Factory::selectBuilders(int features) - { - IF_ENABLED_TRIS (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelTriangle4MeshSAH)); - IF_ENABLED_TRIS (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelTriangle4iMeshSAH)); - IF_ENABLED_TRIS (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelTriangle4vMeshSAH)); - IF_ENABLED_QUADS (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelQuadMeshSAH)); - IF_ENABLED_USER (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelVirtualSAH)); - IF_ENABLED_INSTANCE (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelInstanceSAH)); - - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Curve4vBuilder_OBB_New)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Curve4iBuilder_OBB_New)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4OBBCurve4iMBBuilder_OBB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX(features,BVH4Curve8iBuilder_OBB_New)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX(features,BVH4OBBCurve8iMBBuilder_OBB)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Triangle4SceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Triangle4vSceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Triangle4iSceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4iMBSceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4vMBSceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4QuantizedTriangle4iSceneBuilderSAH)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Quad4vSceneBuilderSAH)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Quad4iSceneBuilderSAH)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Quad4iMBSceneBuilderSAH)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4QuantizedQuad4iSceneBuilderSAH)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4SceneBuilderFastSpatialSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4vSceneBuilderFastSpatialSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4iSceneBuilderFastSpatialSAH)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Quad4vSceneBuilderFastSpatialSAH)); - - IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4VirtualSceneBuilderSAH)); - IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4VirtualMBSceneBuilderSAH)); - - IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4InstanceSceneBuilderSAH)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4InstanceMBSceneBuilderSAH)); - - IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4GridSceneBuilderSAH)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4GridMBSceneBuilderSAH)); - - IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4SubdivPatch1BuilderSAH)); - IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4SubdivPatch1MBBuilderSAH)); - } - - void BVH4Factory::selectIntersectors(int features) - { - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector4i)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8i)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector4v)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8v)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector4iMB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8iMB)); - - /* select intersectors1 */ - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector1)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector1MB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust1)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust1MB)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4Intersector1Moeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,BVH4Triangle4iIntersector1Moeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,BVH4Triangle4vIntersector1Pluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,BVH4Triangle4iIntersector1Pluecker)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector1Moeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector1Moeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector1Pluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector1Pluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector1Moeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector1Moeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector1Pluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector1Pluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector1Pluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector1Moeller)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,QBVH4Triangle4iIntersector1Pluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,QBVH4Quad4iIntersector1Pluecker)); - - IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1Intersector1)); - IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1MBIntersector1)); - - IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4VirtualIntersector1)); - IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4VirtualMBIntersector1)); - - IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4InstanceIntersector1)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4InstanceMBIntersector1)); - - IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector1Moeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridMBIntersector1Moeller)) - IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector1Pluecker)); - -#if defined (EMBREE_RAY_PACKETS) - - /* select intersectors4 */ - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector4Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector4HybridMB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust4Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust4HybridMB)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4Intersector4HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4Intersector4HybridMoellerNoFilter)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iIntersector4HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vIntersector4HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iIntersector4HybridPluecker)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector4HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector4HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector4HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector4HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector4HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector4HybridMoellerNoFilter)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector4HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector4HybridPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector4HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector4HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector4HybridPluecker)); - - IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1Intersector4)); - IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1MBIntersector4)); - - IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4VirtualIntersector4Chunk)); - IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4VirtualMBIntersector4Chunk)); - - IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4InstanceIntersector4Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4InstanceMBIntersector4Chunk)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector4HybridMoeller)); - - IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector4HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridMBIntersector4HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector4HybridPluecker)); - - /* select intersectors8 */ - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector8Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector8HybridMB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust8Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust8HybridMB)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4Intersector8HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4Intersector8HybridMoellerNoFilter)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iIntersector8HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vIntersector8HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iIntersector8HybridPluecker)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector8HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector8HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector8HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector8HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector8HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector8HybridMoellerNoFilter)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector8HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector8HybridPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector8HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector8HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector8HybridPluecker)); - - IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1Intersector8)); - IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1MBIntersector8)); - - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4VirtualIntersector8Chunk)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4VirtualMBIntersector8Chunk)); - - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4InstanceIntersector8Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4InstanceMBIntersector8Chunk)); - - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector8HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4GridMBIntersector8HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector8HybridPluecker)); - - /* select intersectors16 */ - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4OBBVirtualCurveIntersector16Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4OBBVirtualCurveIntersector16HybridMB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust16Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust16HybridMB)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4Intersector16HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4Intersector16HybridMoellerNoFilter)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4iIntersector16HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4vIntersector16HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4iIntersector16HybridPluecker)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4vMBIntersector16HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4iMBIntersector16HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4vMBIntersector16HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4iMBIntersector16HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersector16HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersector16HybridMoellerNoFilter)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4iIntersector16HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersector16HybridPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4iIntersector16HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4iMBIntersector16HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4iMBIntersector16HybridPluecker)); - - IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4SubdivPatch1Intersector16)); - IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4SubdivPatch1MBIntersector16)); - - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4VirtualIntersector16Chunk)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4VirtualMBIntersector16Chunk)); - - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4InstanceIntersector16Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4InstanceMBIntersector16Chunk)); - - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4GridIntersector16HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4GridMBIntersector16HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4GridIntersector16HybridPluecker)); - - /* select stream intersectors */ - SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4IntersectorStreamPacketFallback); - - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4IntersectorStreamMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4IntersectorStreamMoellerNoFilter)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4iIntersectorStreamMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4vIntersectorStreamPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4iIntersectorStreamPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersectorStreamMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersectorStreamMoellerNoFilter)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4iIntersectorStreamMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersectorStreamPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4iIntersectorStreamPluecker)); - - IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4VirtualIntersectorStream)); - - IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4InstanceIntersectorStream)); - -#endif - } - - Accel::Intersectors BVH4Factory::BVH4OBBVirtualCurveIntersectors(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant) - { - switch (ivariant) { - case IntersectVariant::FAST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.leafIntersector = leafIntersector; - intersectors.intersector1 = BVH4OBBVirtualCurveIntersector1(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4OBBVirtualCurveIntersector4Hybrid(); - intersectors.intersector8 = BVH4OBBVirtualCurveIntersector8Hybrid(); - intersectors.intersector16 = BVH4OBBVirtualCurveIntersector16Hybrid(); - intersectors.intersectorN = BVH4IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - case IntersectVariant::ROBUST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.leafIntersector = leafIntersector; - intersectors.intersector1 = BVH4OBBVirtualCurveIntersectorRobust1(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4OBBVirtualCurveIntersectorRobust4Hybrid(); - intersectors.intersector8 = BVH4OBBVirtualCurveIntersectorRobust8Hybrid(); - intersectors.intersector16 = BVH4OBBVirtualCurveIntersectorRobust16Hybrid(); - intersectors.intersectorN = BVH4IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - default: assert(false); - } - return Accel::Intersectors(); - } - - Accel::Intersectors BVH4Factory::BVH4OBBVirtualCurveIntersectorsMB(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant) - { - switch (ivariant) { - case IntersectVariant::FAST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.leafIntersector = leafIntersector; - intersectors.intersector1 = BVH4OBBVirtualCurveIntersector1MB(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4OBBVirtualCurveIntersector4HybridMB(); - intersectors.intersector8 = BVH4OBBVirtualCurveIntersector8HybridMB(); - intersectors.intersector16 = BVH4OBBVirtualCurveIntersector16HybridMB(); - intersectors.intersectorN = BVH4IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - case IntersectVariant::ROBUST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.leafIntersector = leafIntersector; - intersectors.intersector1 = BVH4OBBVirtualCurveIntersectorRobust1MB(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4OBBVirtualCurveIntersectorRobust4HybridMB(); - intersectors.intersector8 = BVH4OBBVirtualCurveIntersectorRobust8HybridMB(); - intersectors.intersector16 = BVH4OBBVirtualCurveIntersectorRobust16HybridMB(); - intersectors.intersectorN = BVH4IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - default: assert(false); - } - return Accel::Intersectors(); - } - - Accel::Intersectors BVH4Factory::BVH4Triangle4Intersectors(BVH4* bvh, IntersectVariant ivariant) - { - assert(ivariant == IntersectVariant::FAST); - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4Triangle4Intersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4_filter = BVH4Triangle4Intersector4HybridMoeller(); - intersectors.intersector4_nofilter = BVH4Triangle4Intersector4HybridMoellerNoFilter(); - intersectors.intersector8_filter = BVH4Triangle4Intersector8HybridMoeller(); - intersectors.intersector8_nofilter = BVH4Triangle4Intersector8HybridMoellerNoFilter(); - intersectors.intersector16_filter = BVH4Triangle4Intersector16HybridMoeller(); - intersectors.intersector16_nofilter = BVH4Triangle4Intersector16HybridMoellerNoFilter(); - intersectors.intersectorN_filter = BVH4Triangle4IntersectorStreamMoeller(); - intersectors.intersectorN_nofilter = BVH4Triangle4IntersectorStreamMoellerNoFilter(); -#endif - return intersectors; - } - - Accel::Intersectors BVH4Factory::BVH4Triangle4vIntersectors(BVH4* bvh, IntersectVariant ivariant) - { - assert(ivariant == IntersectVariant::ROBUST); - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4Triangle4vIntersector1Pluecker(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4Triangle4vIntersector4HybridPluecker(); - intersectors.intersector8 = BVH4Triangle4vIntersector8HybridPluecker(); - intersectors.intersector16 = BVH4Triangle4vIntersector16HybridPluecker(); - intersectors.intersectorN = BVH4Triangle4vIntersectorStreamPluecker(); -#endif - return intersectors; - } - - Accel::Intersectors BVH4Factory::BVH4Triangle4iIntersectors(BVH4* bvh, IntersectVariant ivariant) - { - switch (ivariant) { - case IntersectVariant::FAST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4Triangle4iIntersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4Triangle4iIntersector4HybridMoeller(); - intersectors.intersector8 = BVH4Triangle4iIntersector8HybridMoeller(); - intersectors.intersector16 = BVH4Triangle4iIntersector16HybridMoeller(); - intersectors.intersectorN = BVH4Triangle4iIntersectorStreamMoeller(); -#endif - return intersectors; - } - case IntersectVariant::ROBUST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4Triangle4iIntersector1Pluecker(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4Triangle4iIntersector4HybridPluecker(); - intersectors.intersector8 = BVH4Triangle4iIntersector8HybridPluecker(); - intersectors.intersector16 = BVH4Triangle4iIntersector16HybridPluecker(); - intersectors.intersectorN = BVH4Triangle4iIntersectorStreamPluecker(); -#endif - return intersectors; - } - } - return Accel::Intersectors(); - } - - Accel::Intersectors BVH4Factory::BVH4Triangle4vMBIntersectors(BVH4* bvh, IntersectVariant ivariant) - { - switch (ivariant) { - case IntersectVariant::FAST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4Triangle4vMBIntersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4Triangle4vMBIntersector4HybridMoeller(); - intersectors.intersector8 = BVH4Triangle4vMBIntersector8HybridMoeller(); - intersectors.intersector16 = BVH4Triangle4vMBIntersector16HybridMoeller(); - intersectors.intersectorN = BVH4IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - case IntersectVariant::ROBUST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4Triangle4vMBIntersector1Pluecker(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4Triangle4vMBIntersector4HybridPluecker(); - intersectors.intersector8 = BVH4Triangle4vMBIntersector8HybridPluecker(); - intersectors.intersector16 = BVH4Triangle4vMBIntersector16HybridPluecker(); - intersectors.intersectorN = BVH4IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - } - return Accel::Intersectors(); - } - - Accel::Intersectors BVH4Factory::BVH4Triangle4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant) - { - switch (ivariant) { - case IntersectVariant::FAST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4Triangle4iMBIntersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4Triangle4iMBIntersector4HybridMoeller(); - intersectors.intersector8 = BVH4Triangle4iMBIntersector8HybridMoeller(); - intersectors.intersector16 = BVH4Triangle4iMBIntersector16HybridMoeller(); - intersectors.intersectorN = BVH4IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - case IntersectVariant::ROBUST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4Triangle4iMBIntersector1Pluecker(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4Triangle4iMBIntersector4HybridPluecker(); - intersectors.intersector8 = BVH4Triangle4iMBIntersector8HybridPluecker(); - intersectors.intersector16 = BVH4Triangle4iMBIntersector16HybridPluecker(); - intersectors.intersectorN = BVH4IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - } - return Accel::Intersectors(); - } - - Accel::Intersectors BVH4Factory::BVH4Quad4vIntersectors(BVH4* bvh, IntersectVariant ivariant) - { - switch (ivariant) { - case IntersectVariant::FAST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4Quad4vIntersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4_filter = BVH4Quad4vIntersector4HybridMoeller(); - intersectors.intersector4_nofilter = BVH4Quad4vIntersector4HybridMoellerNoFilter(); - intersectors.intersector8_filter = BVH4Quad4vIntersector8HybridMoeller(); - intersectors.intersector8_nofilter = BVH4Quad4vIntersector8HybridMoellerNoFilter(); - intersectors.intersector16_filter = BVH4Quad4vIntersector16HybridMoeller(); - intersectors.intersector16_nofilter = BVH4Quad4vIntersector16HybridMoellerNoFilter(); - intersectors.intersectorN_filter = BVH4Quad4vIntersectorStreamMoeller(); - intersectors.intersectorN_nofilter = BVH4Quad4vIntersectorStreamMoellerNoFilter(); -#endif - return intersectors; - } - case IntersectVariant::ROBUST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4Quad4vIntersector1Pluecker(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4Quad4vIntersector4HybridPluecker(); - intersectors.intersector8 = BVH4Quad4vIntersector8HybridPluecker(); - intersectors.intersector16 = BVH4Quad4vIntersector16HybridPluecker(); - intersectors.intersectorN = BVH4Quad4vIntersectorStreamPluecker(); -#endif - return intersectors; - } - } - return Accel::Intersectors(); - } - - Accel::Intersectors BVH4Factory::BVH4Quad4iIntersectors(BVH4* bvh, IntersectVariant ivariant) - { - switch (ivariant) { - case IntersectVariant::FAST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4Quad4iIntersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4Quad4iIntersector4HybridMoeller(); - intersectors.intersector8 = BVH4Quad4iIntersector8HybridMoeller(); - intersectors.intersector16= BVH4Quad4iIntersector16HybridMoeller(); - intersectors.intersectorN = BVH4Quad4iIntersectorStreamMoeller(); -#endif - return intersectors; - } - case IntersectVariant::ROBUST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4Quad4iIntersector1Pluecker(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4Quad4iIntersector4HybridPluecker(); - intersectors.intersector8 = BVH4Quad4iIntersector8HybridPluecker(); - intersectors.intersector16= BVH4Quad4iIntersector16HybridPluecker(); - intersectors.intersectorN = BVH4Quad4iIntersectorStreamPluecker(); -#endif - return intersectors; - } - } - return Accel::Intersectors(); - } - - Accel::Intersectors BVH4Factory::BVH4Quad4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant) - { - switch (ivariant) { - case IntersectVariant::FAST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4Quad4iMBIntersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4Quad4iMBIntersector4HybridMoeller(); - intersectors.intersector8 = BVH4Quad4iMBIntersector8HybridMoeller(); - intersectors.intersector16= BVH4Quad4iMBIntersector16HybridMoeller(); - intersectors.intersectorN = BVH4IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - case IntersectVariant::ROBUST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4Quad4iMBIntersector1Pluecker(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4Quad4iMBIntersector4HybridPluecker(); - intersectors.intersector8 = BVH4Quad4iMBIntersector8HybridPluecker(); - intersectors.intersector16= BVH4Quad4iMBIntersector16HybridPluecker(); - intersectors.intersectorN = BVH4IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - } - return Accel::Intersectors(); - } - - Accel::Intersectors BVH4Factory::QBVH4Triangle4iIntersectors(BVH4* bvh) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = QBVH4Triangle4iIntersector1Pluecker(); - return intersectors; - } - - Accel::Intersectors BVH4Factory::QBVH4Quad4iIntersectors(BVH4* bvh) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = QBVH4Quad4iIntersector1Pluecker(); - return intersectors; - } - - Accel::Intersectors BVH4Factory::BVH4UserGeometryIntersectors(BVH4* bvh) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4VirtualIntersector1(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4VirtualIntersector4Chunk(); - intersectors.intersector8 = BVH4VirtualIntersector8Chunk(); - intersectors.intersector16 = BVH4VirtualIntersector16Chunk(); - intersectors.intersectorN = BVH4VirtualIntersectorStream(); -#endif - intersectors.collider = BVH4ColliderUserGeom(); - return intersectors; - } - - Accel::Intersectors BVH4Factory::BVH4UserGeometryMBIntersectors(BVH4* bvh) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4VirtualMBIntersector1(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4VirtualMBIntersector4Chunk(); - intersectors.intersector8 = BVH4VirtualMBIntersector8Chunk(); - intersectors.intersector16 = BVH4VirtualMBIntersector16Chunk(); - intersectors.intersectorN = BVH4IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - - Accel::Intersectors BVH4Factory::BVH4InstanceIntersectors(BVH4* bvh) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4InstanceIntersector1(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4InstanceIntersector4Chunk(); - intersectors.intersector8 = BVH4InstanceIntersector8Chunk(); - intersectors.intersector16 = BVH4InstanceIntersector16Chunk(); - intersectors.intersectorN = BVH4InstanceIntersectorStream(); -#endif - return intersectors; - } - - Accel::Intersectors BVH4Factory::BVH4InstanceMBIntersectors(BVH4* bvh) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4InstanceMBIntersector1(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4InstanceMBIntersector4Chunk(); - intersectors.intersector8 = BVH4InstanceMBIntersector8Chunk(); - intersectors.intersector16 = BVH4InstanceMBIntersector16Chunk(); - intersectors.intersectorN = BVH4IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - - Accel::Intersectors BVH4Factory::BVH4SubdivPatch1Intersectors(BVH4* bvh) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4SubdivPatch1Intersector1(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4SubdivPatch1Intersector4(); - intersectors.intersector8 = BVH4SubdivPatch1Intersector8(); - intersectors.intersector16 = BVH4SubdivPatch1Intersector16(); - intersectors.intersectorN = BVH4IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - - Accel::Intersectors BVH4Factory::BVH4SubdivPatch1MBIntersectors(BVH4* bvh) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4SubdivPatch1MBIntersector1(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4SubdivPatch1MBIntersector4(); - intersectors.intersector8 = BVH4SubdivPatch1MBIntersector8(); - intersectors.intersector16 = BVH4SubdivPatch1MBIntersector16(); - intersectors.intersectorN = BVH4IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - - Accel* BVH4Factory::BVH4OBBVirtualCurve4i(Scene* scene, IntersectVariant ivariant) - { - BVH4* accel = new BVH4(Curve4i::type,scene); - Accel::Intersectors intersectors = BVH4OBBVirtualCurveIntersectors(accel,VirtualCurveIntersector4i(),ivariant); - - Builder* builder = nullptr; - if (scene->device->hair_builder == "default" ) builder = BVH4Curve4iBuilder_OBB_New(accel,scene,0); - else if (scene->device->hair_builder == "sah" ) builder = BVH4Curve4iBuilder_OBB_New(accel,scene,0); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->hair_builder+" for BVH4OBB<VirtualCurve4i>"); - - return new AccelInstance(accel,builder,intersectors); - } - -#if defined(EMBREE_TARGET_SIMD8) - Accel* BVH4Factory::BVH4OBBVirtualCurve8i(Scene* scene, IntersectVariant ivariant) - { - BVH4* accel = new BVH4(Curve8i::type,scene); - Accel::Intersectors intersectors = BVH4OBBVirtualCurveIntersectors(accel,VirtualCurveIntersector8i(),ivariant); - - Builder* builder = nullptr; - if (scene->device->hair_builder == "default" ) builder = BVH4Curve8iBuilder_OBB_New(accel,scene,0); - else if (scene->device->hair_builder == "sah" ) builder = BVH4Curve8iBuilder_OBB_New(accel,scene,0); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->hair_builder+" for BVH4OBB<VirtualCurve8i>"); - - return new AccelInstance(accel,builder,intersectors); - } -#endif - - Accel* BVH4Factory::BVH4OBBVirtualCurve4v(Scene* scene, IntersectVariant ivariant) - { - BVH4* accel = new BVH4(Curve4v::type,scene); - Accel::Intersectors intersectors = BVH4OBBVirtualCurveIntersectors(accel,VirtualCurveIntersector4v(),ivariant); - - Builder* builder = nullptr; - if (scene->device->hair_builder == "default" ) builder = BVH4Curve4vBuilder_OBB_New(accel,scene,0); - else if (scene->device->hair_builder == "sah" ) builder = BVH4Curve4vBuilder_OBB_New(accel,scene,0); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->hair_builder+" for BVH4OBB<VirtualCurve4v>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH4Factory::BVH4OBBVirtualCurve4iMB(Scene* scene, IntersectVariant ivariant) - { - BVH4* accel = new BVH4(Curve4iMB::type,scene); - Accel::Intersectors intersectors = BVH4OBBVirtualCurveIntersectorsMB(accel,VirtualCurveIntersector4iMB(),ivariant); - - Builder* builder = nullptr; - if (scene->device->hair_builder == "default" ) builder = BVH4OBBCurve4iMBBuilder_OBB(accel,scene,0); - else if (scene->device->hair_builder == "sah" ) builder = BVH4OBBCurve4iMBBuilder_OBB(accel,scene,0); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->hair_builder+" for BVH4OBB<VirtualCurve4iMB>"); - - return new AccelInstance(accel,builder,intersectors); - } - -#if defined(EMBREE_TARGET_SIMD8) - Accel* BVH4Factory::BVH4OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant) - { - BVH4* accel = new BVH4(Curve8iMB::type,scene); - Accel::Intersectors intersectors = BVH4OBBVirtualCurveIntersectorsMB(accel,VirtualCurveIntersector8iMB(), ivariant); - - Builder* builder = nullptr; - if (scene->device->hair_builder == "default" ) builder = BVH4OBBCurve8iMBBuilder_OBB(accel,scene,0); - else if (scene->device->hair_builder == "sah" ) builder = BVH4OBBCurve8iMBBuilder_OBB(accel,scene,0); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->hair_builder+" for BVH4OBB<VirtualCurve8iMB>"); - - return new AccelInstance(accel,builder,intersectors); - } -#endif - - Accel* BVH4Factory::BVH4Triangle4(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH4* accel = new BVH4(Triangle4::type,scene); - - Accel::Intersectors intersectors; - if (scene->device->tri_traverser == "default") intersectors = BVH4Triangle4Intersectors(accel,ivariant); - else if (scene->device->tri_traverser == "fast" ) intersectors = BVH4Triangle4Intersectors(accel,IntersectVariant::FAST); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown traverser "+scene->device->tri_traverser+" for BVH4<Triangle4>"); - - Builder* builder = nullptr; - if (scene->device->tri_builder == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH4Triangle4SceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelTriangle4MeshSAH(accel,scene,false); break; - case BuildVariant::HIGH_QUALITY: builder = BVH4Triangle4SceneBuilderFastSpatialSAH(accel,scene,0); break; - } - } - else if (scene->device->tri_builder == "sah" ) builder = BVH4Triangle4SceneBuilderSAH(accel,scene,0); - else if (scene->device->tri_builder == "sah_fast_spatial" ) builder = BVH4Triangle4SceneBuilderFastSpatialSAH(accel,scene,0); - else if (scene->device->tri_builder == "sah_presplit") builder = BVH4Triangle4SceneBuilderSAH(accel,scene,MODE_HIGH_QUALITY); - else if (scene->device->tri_builder == "dynamic" ) builder = BVH4BuilderTwoLevelTriangle4MeshSAH(accel,scene,false); - else if (scene->device->tri_builder == "morton" ) builder = BVH4BuilderTwoLevelTriangle4MeshSAH(accel,scene,true); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH4<Triangle4>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH4Factory::BVH4Triangle4v(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH4* accel = new BVH4(Triangle4v::type,scene); - - Accel::Intersectors intersectors; - if (scene->device->tri_traverser == "default") intersectors = BVH4Triangle4vIntersectors(accel,ivariant); - else if (scene->device->tri_traverser == "fast" ) intersectors = BVH4Triangle4vIntersectors(accel,IntersectVariant::FAST); - else if (scene->device->tri_traverser == "robust" ) intersectors = BVH4Triangle4vIntersectors(accel,IntersectVariant::ROBUST); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown traverser "+scene->device->tri_traverser+" for BVH4<Triangle4>"); - - Builder* builder = nullptr; - if (scene->device->tri_builder == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH4Triangle4vSceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelTriangle4vMeshSAH(accel,scene,false); break; - case BuildVariant::HIGH_QUALITY: builder = BVH4Triangle4vSceneBuilderFastSpatialSAH(accel,scene,0); break; - } - } - else if (scene->device->tri_builder == "sah" ) builder = BVH4Triangle4vSceneBuilderSAH(accel,scene,0); - else if (scene->device->tri_builder == "sah_fast_spatial" ) builder = BVH4Triangle4vSceneBuilderFastSpatialSAH(accel,scene,0); - else if (scene->device->tri_builder == "sah_presplit") builder = BVH4Triangle4vSceneBuilderSAH(accel,scene,MODE_HIGH_QUALITY); - else if (scene->device->tri_builder == "dynamic" ) builder = BVH4BuilderTwoLevelTriangle4vMeshSAH(accel,scene,false); - else if (scene->device->tri_builder == "morton" ) builder = BVH4BuilderTwoLevelTriangle4vMeshSAH(accel,scene,true); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH4<Triangle4v>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH4Factory::BVH4Triangle4i(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH4* accel = new BVH4(Triangle4i::type,scene); - - Accel::Intersectors intersectors; - if (scene->device->tri_traverser == "default") intersectors = BVH4Triangle4iIntersectors(accel,ivariant); - else if (scene->device->tri_traverser == "fast" ) intersectors = BVH4Triangle4iIntersectors(accel,IntersectVariant::FAST); - else if (scene->device->tri_traverser == "robust" ) intersectors = BVH4Triangle4iIntersectors(accel,IntersectVariant::ROBUST); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown traverser "+scene->device->tri_traverser+" for BVH4<Triangle4i>"); - - Builder* builder = nullptr; - if (scene->device->tri_builder == "default" ) { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH4Triangle4iSceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelTriangle4iMeshSAH(accel,scene,false); break; - case BuildVariant::HIGH_QUALITY: builder = BVH4Triangle4iSceneBuilderFastSpatialSAH(accel,scene,0); break; - } - } - else if (scene->device->tri_builder == "sah" ) builder = BVH4Triangle4iSceneBuilderSAH(accel,scene,0); - else if (scene->device->tri_builder == "sah_fast_spatial" ) builder = BVH4Triangle4iSceneBuilderFastSpatialSAH(accel,scene,0); - else if (scene->device->tri_builder == "sah_presplit") builder = BVH4Triangle4iSceneBuilderSAH(accel,scene,MODE_HIGH_QUALITY); - else if (scene->device->tri_builder == "dynamic" ) builder = BVH4BuilderTwoLevelTriangle4iMeshSAH(accel,scene,false); - else if (scene->device->tri_builder == "morton" ) builder = BVH4BuilderTwoLevelTriangle4iMeshSAH(accel,scene,true); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH4<Triangle4i>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH4Factory::BVH4Triangle4iMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH4* accel = new BVH4(Triangle4i::type,scene); - - Accel::Intersectors intersectors; - if (scene->device->tri_traverser_mb == "default") intersectors = BVH4Triangle4iMBIntersectors(accel,ivariant); - else if (scene->device->tri_traverser_mb == "fast" ) intersectors = BVH4Triangle4iMBIntersectors(accel,IntersectVariant::FAST); - else if (scene->device->tri_traverser_mb == "robust" ) intersectors = BVH4Triangle4iMBIntersectors(accel,IntersectVariant::ROBUST); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown traverser "+scene->device->tri_traverser_mb+" for BVH4<Triangle4iMB>"); - - Builder* builder = nullptr; - if (scene->device->tri_builder_mb == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH4Triangle4iMBSceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement - case BuildVariant::HIGH_QUALITY: assert(false); break; - } - } - else if (scene->device->tri_builder_mb == "internal_time_splits") builder = BVH4Triangle4iMBSceneBuilderSAH(accel,scene,0); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder_mb+" for BVH4<Triangle4iMB>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH4Factory::BVH4Triangle4vMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH4* accel = new BVH4(Triangle4vMB::type,scene); - - Accel::Intersectors intersectors; - if (scene->device->tri_traverser_mb == "default") intersectors = BVH4Triangle4vMBIntersectors(accel,ivariant); - else if (scene->device->tri_traverser_mb == "fast" ) intersectors = BVH4Triangle4vMBIntersectors(accel,IntersectVariant::FAST); - else if (scene->device->tri_traverser_mb == "robust" ) intersectors = BVH4Triangle4vMBIntersectors(accel,IntersectVariant::ROBUST); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown traverser "+scene->device->tri_traverser_mb+" for BVH4<Triangle4vMB>"); - - Builder* builder = nullptr; - if (scene->device->tri_builder_mb == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH4Triangle4vMBSceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement - case BuildVariant::HIGH_QUALITY: assert(false); break; - } - } - else if (scene->device->tri_builder_mb == "internal_time_splits") builder = BVH4Triangle4vMBSceneBuilderSAH(accel,scene,0); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder_mb+" for BVH4<Triangle4vMB>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH4Factory::BVH4Quad4v(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH4* accel = new BVH4(Quad4v::type,scene); - Accel::Intersectors intersectors = BVH4Quad4vIntersectors(accel,ivariant); - - Builder* builder = nullptr; - if (scene->device->quad_builder == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH4Quad4vSceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelQuadMeshSAH(accel,scene,false); break; - case BuildVariant::HIGH_QUALITY: builder = BVH4Quad4vSceneBuilderFastSpatialSAH(accel,scene,0); break; - } - } - else if (scene->device->quad_builder == "sah" ) builder = BVH4Quad4vSceneBuilderSAH(accel,scene,0); - else if (scene->device->quad_builder == "sah_fast_spatial" ) builder = BVH4Quad4vSceneBuilderFastSpatialSAH(accel,scene,0); - else if (scene->device->quad_builder == "dynamic" ) builder = BVH4BuilderTwoLevelQuadMeshSAH(accel,scene,false); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder+" for BVH4<Quad4v>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH4Factory::BVH4Quad4i(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH4* accel = new BVH4(Quad4i::type,scene); - Accel::Intersectors intersectors = BVH4Quad4iIntersectors(accel,ivariant); - - Builder* builder = nullptr; - if (scene->device->quad_builder == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH4Quad4iSceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement - case BuildVariant::HIGH_QUALITY: assert(false); break; // FIXME: implement - } - } - else if (scene->device->quad_builder == "sah") builder = BVH4Quad4iSceneBuilderSAH(accel,scene,0); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder+" for BVH4<Quad4i>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH4Factory::BVH4Quad4iMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH4* accel = new BVH4(Quad4i::type,scene); - Accel::Intersectors intersectors = BVH4Quad4iMBIntersectors(accel,ivariant); - - Builder* builder = nullptr; - if (scene->device->quad_builder_mb == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH4Quad4iMBSceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement - case BuildVariant::HIGH_QUALITY: assert(false); break; - } - } - else if (scene->device->quad_builder_mb == "sah") builder = BVH4Quad4iMBSceneBuilderSAH(accel,scene,0); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder_mb+" for BVH4<Quad4iMB>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH4Factory::BVH4QuantizedQuad4i(Scene* scene) - { - BVH4* accel = new BVH4(Quad4i::type,scene); - Builder* builder = BVH4QuantizedQuad4iSceneBuilderSAH(accel,scene,0); - Accel::Intersectors intersectors = QBVH4Quad4iIntersectors(accel); - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH4Factory::BVH4QuantizedTriangle4i(Scene* scene) - { - BVH4* accel = new BVH4(Triangle4i::type,scene); - Builder* builder = BVH4QuantizedTriangle4iSceneBuilderSAH(accel,scene,0); - Accel::Intersectors intersectors = QBVH4Triangle4iIntersectors(accel); - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH4Factory::BVH4SubdivPatch1(Scene* scene) - { - BVH4* accel = new BVH4(SubdivPatch1::type,scene); - Accel::Intersectors intersectors = BVH4SubdivPatch1Intersectors(accel); - Builder* builder = BVH4SubdivPatch1BuilderSAH(accel,scene,0); - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH4Factory::BVH4SubdivPatch1MB(Scene* scene) - { - BVH4* accel = new BVH4(SubdivPatch1::type,scene); - Accel::Intersectors intersectors = BVH4SubdivPatch1MBIntersectors(accel); - Builder* builder = BVH4SubdivPatch1MBBuilderSAH(accel,scene,0); - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH4Factory::BVH4UserGeometry(Scene* scene, BuildVariant bvariant) - { - BVH4* accel = new BVH4(Object::type,scene); - Accel::Intersectors intersectors = BVH4UserGeometryIntersectors(accel); - - Builder* builder = nullptr; - if (scene->device->object_builder == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH4VirtualSceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelVirtualSAH(accel,scene,false); break; - case BuildVariant::HIGH_QUALITY: assert(false); break; - } - } - else if (scene->device->object_builder == "sah") builder = BVH4VirtualSceneBuilderSAH(accel,scene,0); - else if (scene->device->object_builder == "dynamic") builder = BVH4BuilderTwoLevelVirtualSAH(accel,scene,false); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH4<Object>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH4Factory::BVH4UserGeometryMB(Scene* scene) - { - BVH4* accel = new BVH4(Object::type,scene); - Accel::Intersectors intersectors = BVH4UserGeometryMBIntersectors(accel); - Builder* builder = BVH4VirtualMBSceneBuilderSAH(accel,scene,0); - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH4Factory::BVH4Instance(Scene* scene, bool isExpensive, BuildVariant bvariant) - { - BVH4* accel = new BVH4(InstancePrimitive::type,scene); - Accel::Intersectors intersectors = BVH4InstanceIntersectors(accel); - auto gtype = isExpensive ? Geometry::MTY_INSTANCE_EXPENSIVE : Geometry::MTY_INSTANCE_CHEAP; - // Builder* builder = BVH4InstanceSceneBuilderSAH(accel,scene,gtype); - - Builder* builder = nullptr; - if (scene->device->object_builder == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH4InstanceSceneBuilderSAH(accel,scene,gtype); break; - case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelInstanceSAH(accel,scene,gtype,false); break; - case BuildVariant::HIGH_QUALITY: assert(false); break; - } - } - else if (scene->device->object_builder == "sah") builder = BVH4InstanceSceneBuilderSAH(accel,scene,gtype); - else if (scene->device->object_builder == "dynamic") builder = BVH4BuilderTwoLevelInstanceSAH(accel,scene,gtype,false); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH4<Object>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH4Factory::BVH4InstanceMB(Scene* scene, bool isExpensive) - { - BVH4* accel = new BVH4(InstancePrimitive::type,scene); - Accel::Intersectors intersectors = BVH4InstanceMBIntersectors(accel); - auto gtype = isExpensive ? Geometry::MTY_INSTANCE_EXPENSIVE : Geometry::MTY_INSTANCE_CHEAP; - Builder* builder = BVH4InstanceMBSceneBuilderSAH(accel,scene,gtype); - return new AccelInstance(accel,builder,intersectors); - } - - Accel::Intersectors BVH4Factory::BVH4GridIntersectors(BVH4* bvh, IntersectVariant ivariant) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - if (ivariant == IntersectVariant::FAST) - { - intersectors.intersector1 = BVH4GridIntersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4GridIntersector4HybridMoeller(); - intersectors.intersector8 = BVH4GridIntersector8HybridMoeller(); - intersectors.intersector16 = BVH4GridIntersector16HybridMoeller(); - intersectors.intersectorN = BVH4IntersectorStreamPacketFallback(); -#endif - } - else /* if (ivariant == IntersectVariant::ROBUST) */ - { - intersectors.intersector1 = BVH4GridIntersector1Pluecker(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4GridIntersector4HybridPluecker(); - intersectors.intersector8 = BVH4GridIntersector8HybridPluecker(); - intersectors.intersector16 = BVH4GridIntersector16HybridPluecker(); - intersectors.intersectorN = BVH4IntersectorStreamPacketFallback(); -#endif - } - return intersectors; - } - - Accel::Intersectors BVH4Factory::BVH4GridMBIntersectors(BVH4* bvh, IntersectVariant ivariant) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH4GridMBIntersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH4GridMBIntersector4HybridMoeller(); - intersectors.intersector8 = BVH4GridMBIntersector8HybridMoeller(); - intersectors.intersector16 = BVH4GridMBIntersector16HybridMoeller(); - intersectors.intersectorN = BVH4IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - - Accel* BVH4Factory::BVH4Grid(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH4* accel = new BVH4(SubGridQBVH4::type,scene); - Accel::Intersectors intersectors = BVH4GridIntersectors(accel,ivariant); - - Builder* builder = nullptr; - if (scene->device->object_builder == "default") { - builder = BVH4GridSceneBuilderSAH(accel,scene,0); - } - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->grid_builder+" for BVH4<GridMesh>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH4Factory::BVH4GridMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH4* accel = new BVH4(SubGridQBVH4::type,scene); - Accel::Intersectors intersectors = BVH4GridMBIntersectors(accel,ivariant); - Builder* builder = nullptr; - if (scene->device->object_builder == "default") { - builder = BVH4GridMBSceneBuilderSAH(accel,scene,0); - } - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->grid_builder+" for BVH4MB<GridMesh>"); - return new AccelInstance(accel,builder,intersectors); - } - -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.h b/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.h deleted file mode 100644 index a68227b41f..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.h +++ /dev/null @@ -1,316 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh_factory.h" - -namespace embree -{ - /*! BVH4 instantiations */ - class BVH4Factory : public BVHFactory - { - public: - BVH4Factory(int bfeatures, int ifeatures); - - public: - Accel* BVH4OBBVirtualCurve4i(Scene* scene, IntersectVariant ivariant); - Accel* BVH4OBBVirtualCurve4v(Scene* scene, IntersectVariant ivariant); - Accel* BVH4OBBVirtualCurve8i(Scene* scene, IntersectVariant ivariant); - Accel* BVH4OBBVirtualCurve4iMB(Scene* scene, IntersectVariant ivariant); - Accel* BVH4OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant); - DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4i); - DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8i); - DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4v); - DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8v); - DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4iMB); - DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8iMB); - - Accel* BVH4Triangle4 (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - Accel* BVH4Triangle4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::ROBUST); - Accel* BVH4Triangle4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - Accel* BVH4Triangle4vMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - Accel* BVH4Triangle4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - - Accel* BVH4Quad4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - Accel* BVH4Quad4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - Accel* BVH4Quad4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - - Accel* BVH4QuantizedTriangle4i(Scene* scene); - Accel* BVH4QuantizedQuad4i(Scene* scene); - - Accel* BVH4SubdivPatch1(Scene* scene); - Accel* BVH4SubdivPatch1MB(Scene* scene); - - Accel* BVH4UserGeometry(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC); - Accel* BVH4UserGeometryMB(Scene* scene); - - Accel* BVH4Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC); - Accel* BVH4InstanceMB(Scene* scene, bool isExpensive); - - Accel* BVH4Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - Accel* BVH4GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - - private: - void selectBuilders(int features); - void selectIntersectors(int features); - - private: - Accel::Intersectors BVH4OBBVirtualCurveIntersectors(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant); - Accel::Intersectors BVH4OBBVirtualCurveIntersectorsMB(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant); - - Accel::Intersectors BVH4Triangle4Intersectors(BVH4* bvh, IntersectVariant ivariant); - Accel::Intersectors BVH4Triangle4vIntersectors(BVH4* bvh, IntersectVariant ivariant); - Accel::Intersectors BVH4Triangle4iIntersectors(BVH4* bvh, IntersectVariant ivariant); - Accel::Intersectors BVH4Triangle4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant); - Accel::Intersectors BVH4Triangle4vMBIntersectors(BVH4* bvh, IntersectVariant ivariant); - - Accel::Intersectors BVH4Quad4vIntersectors(BVH4* bvh, IntersectVariant ivariant); - Accel::Intersectors BVH4Quad4iIntersectors(BVH4* bvh, IntersectVariant ivariant); - Accel::Intersectors BVH4Quad4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant); - - Accel::Intersectors QBVH4Quad4iIntersectors(BVH4* bvh); - Accel::Intersectors QBVH4Triangle4iIntersectors(BVH4* bvh); - - Accel::Intersectors BVH4UserGeometryIntersectors(BVH4* bvh); - Accel::Intersectors BVH4UserGeometryMBIntersectors(BVH4* bvh); - - Accel::Intersectors BVH4InstanceIntersectors(BVH4* bvh); - Accel::Intersectors BVH4InstanceMBIntersectors(BVH4* bvh); - - Accel::Intersectors BVH4SubdivPatch1Intersectors(BVH4* bvh); - Accel::Intersectors BVH4SubdivPatch1MBIntersectors(BVH4* bvh); - - Accel::Intersectors BVH4GridIntersectors(BVH4* bvh, IntersectVariant ivariant); - Accel::Intersectors BVH4GridMBIntersectors(BVH4* bvh, IntersectVariant ivariant); - - private: - - DEFINE_SYMBOL2(Accel::Collider,BVH4ColliderUserGeom); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1MB); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1MB); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4Intersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vIntersector1Pluecker); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Pluecker); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Pluecker); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Pluecker); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Pluecker); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Pluecker); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Pluecker); - - DEFINE_SYMBOL2(Accel::Intersector1,QBVH4Triangle4iIntersector1Pluecker); - DEFINE_SYMBOL2(Accel::Intersector1,QBVH4Quad4iIntersector1Pluecker); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1Intersector1); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1MBIntersector1); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH4VirtualIntersector1); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4VirtualMBIntersector1); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceIntersector1); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceMBIntersector1); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridMBIntersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Pluecker); - - DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4Hybrid); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4HybridMB); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4Hybrid); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4HybridMB); - - DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoellerNoFilter); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vIntersector4HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoellerNoFilter); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1Intersector4); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1MBIntersector4); - - DEFINE_SYMBOL2(Accel::Intersector4,BVH4VirtualIntersector4Chunk); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4VirtualMBIntersector4Chunk); - - DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceIntersector4Chunk); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceMBIntersector4Chunk); - - DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridMBIntersector4HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridPluecker); - - // ============== - - DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8Hybrid); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8HybridMB); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8Hybrid); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8HybridMB); - - DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoellerNoFilter); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vIntersector8HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoellerNoFilter); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1Intersector8); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1MBIntersector8); - - DEFINE_SYMBOL2(Accel::Intersector8,BVH4VirtualIntersector8Chunk); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4VirtualMBIntersector8Chunk); - - DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceIntersector8Chunk); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceMBIntersector8Chunk); - - DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridMBIntersector8HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridPluecker); - - // ============== - - DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16Hybrid); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16HybridMB); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16Hybrid); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16HybridMB); - - DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoellerNoFilter); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vIntersector16HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoellerNoFilter); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1Intersector16); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1MBIntersector16); - - DEFINE_SYMBOL2(Accel::Intersector16,BVH4VirtualIntersector16Chunk); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4VirtualMBIntersector16Chunk); - - DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceIntersector16Chunk); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceMBIntersector16Chunk); - - DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridMBIntersector16HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridPluecker); - - // ============== - - DEFINE_SYMBOL2(Accel::IntersectorN, BVH4IntersectorStreamPacketFallback); - - DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4IntersectorStreamMoeller); - DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4IntersectorStreamMoellerNoFilter); - DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4iIntersectorStreamMoeller); - DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4vIntersectorStreamPluecker); - DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4iIntersectorStreamPluecker); - - DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4vIntersectorStreamMoeller); - DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4vIntersectorStreamMoellerNoFilter); - DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4iIntersectorStreamMoeller); - DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4vIntersectorStreamPluecker); - DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4iIntersectorStreamPluecker); - - DEFINE_SYMBOL2(Accel::IntersectorN,BVH4VirtualIntersectorStream); - - DEFINE_SYMBOL2(Accel::IntersectorN,BVH4InstanceIntersectorStream); - - // SAH scene builders - private: - DEFINE_ISA_FUNCTION(Builder*,BVH4Curve4vBuilder_OBB_New,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4Curve4iBuilder_OBB_New,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4OBBCurve4iMBBuilder_OBB,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4Curve8iBuilder_OBB_New,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t); - - DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - - DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - - DEFINE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1BuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1MBBuilderSAH,void* COMMA Scene* COMMA size_t); - - DEFINE_ISA_FUNCTION(Builder*,BVH4VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - - DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask); - DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask); - - DEFINE_ISA_FUNCTION(Builder*,BVH4GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - - // spatial scene builder - private: - DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t); - - // twolevel scene builders - private: - DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool); - DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool); - DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool); - DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool); - DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool); - DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool); - }; -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.cpp deleted file mode 100644 index 9fe057c392..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.cpp +++ /dev/null @@ -1,1165 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "../common/isa.h" // to define EMBREE_TARGET_SIMD8 - -#if defined (EMBREE_TARGET_SIMD8) - -#include "bvh8_factory.h" -#include "../bvh/bvh.h" - -#include "../geometry/curveNv.h" -#include "../geometry/curveNi.h" -#include "../geometry/curveNi_mb.h" -#include "../geometry/linei.h" -#include "../geometry/triangle.h" -#include "../geometry/trianglev.h" -#include "../geometry/trianglev_mb.h" -#include "../geometry/trianglei.h" -#include "../geometry/quadv.h" -#include "../geometry/quadi.h" -#include "../geometry/subdivpatch1.h" -#include "../geometry/object.h" -#include "../geometry/instance.h" -#include "../geometry/subgrid.h" -#include "../common/accelinstance.h" - -namespace embree -{ - DECLARE_SYMBOL2(Accel::Collider,BVH8ColliderUserGeom); - - DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector8v,void); - DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector8iMB,void); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1); - DECLARE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1MB); - DECLARE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1); - DECLARE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1MB); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4Intersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Pluecker); - DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Pluecker); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Woop); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Pluecker); - DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Pluecker); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Pluecker); - DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Pluecker); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Pluecker); - - DECLARE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4iIntersector1Pluecker); - DECLARE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4Intersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,QBVH8Quad4iIntersector1Pluecker); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH8VirtualIntersector1); - DECLARE_SYMBOL2(Accel::Intersector1,BVH8VirtualMBIntersector1); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH8InstanceIntersector1); - DECLARE_SYMBOL2(Accel::Intersector1,BVH8InstanceMBIntersector1); - - DECLARE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH8GridMBIntersector1Moeller); - DECLARE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Pluecker); - - DECLARE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4Hybrid); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4HybridMB); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4Hybrid); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4HybridMB); - - DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoellerNoFilter); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vIntersector4HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoellerNoFilter); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector4,BVH8VirtualIntersector4Chunk); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8VirtualMBIntersector4Chunk); - - DECLARE_SYMBOL2(Accel::Intersector4,BVH8InstanceIntersector4Chunk); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8InstanceMBIntersector4Chunk); - - DECLARE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8Hybrid); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8HybridMB); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8Hybrid); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8HybridMB); - - DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoellerNoFilter); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vIntersector8HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoellerNoFilter); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector8,BVH8VirtualIntersector8Chunk); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8VirtualMBIntersector8Chunk); - - DECLARE_SYMBOL2(Accel::Intersector8,BVH8InstanceIntersector8Chunk); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8InstanceMBIntersector8Chunk); - - DECLARE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16Hybrid); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16HybridMB); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16Hybrid); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16HybridMB); - - DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoellerNoFilter); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vIntersector16HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoellerNoFilter); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridPluecker); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridPluecker); - - DECLARE_SYMBOL2(Accel::Intersector16,BVH8VirtualIntersector16Chunk); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8VirtualMBIntersector16Chunk); - - DECLARE_SYMBOL2(Accel::Intersector16,BVH8InstanceIntersector16Chunk); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8InstanceMBIntersector16Chunk); - - DECLARE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridMoeller); - DECLARE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridPluecker); - - DECLARE_SYMBOL2(Accel::IntersectorN,BVH8IntersectorStreamPacketFallback); - - DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoeller); - DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoellerNoFilter); - DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamMoeller); - DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4vIntersectorStreamPluecker); - DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamPluecker); - - DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoeller); - DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoellerNoFilter); - DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamMoeller); - DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamPluecker); - DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamPluecker); - - DECLARE_SYMBOL2(Accel::IntersectorN,BVH8VirtualIntersectorStream); - - DECLARE_SYMBOL2(Accel::IntersectorN,BVH8InstanceIntersectorStream); - - DECLARE_ISA_FUNCTION(Builder*,BVH8Curve8vBuilder_OBB_New,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t); - - DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t); - - DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - - DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - - DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask); - DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask); - - DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - - DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool); - DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool); - DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool); - DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool); - DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool); - DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool); - - BVH8Factory::BVH8Factory(int bfeatures, int ifeatures) - { - SELECT_SYMBOL_INIT_AVX(ifeatures,BVH8ColliderUserGeom); - - selectBuilders(bfeatures); - selectIntersectors(ifeatures); - } - - void BVH8Factory::selectBuilders(int features) - { - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX(features,BVH8Curve8vBuilder_OBB_New)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX(features,BVH8OBBCurve8iMBBuilder_OBB)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4SceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4vSceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4iSceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4iMBSceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4vMBSceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX(features,BVH8QuantizedTriangle4iSceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX(features,BVH8QuantizedTriangle4SceneBuilderSAH)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Quad4vSceneBuilderSAH)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Quad4iSceneBuilderSAH)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Quad4iMBSceneBuilderSAH)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX(features,BVH8QuantizedQuad4iSceneBuilderSAH)); - - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX(features,BVH8VirtualSceneBuilderSAH)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX(features,BVH8VirtualMBSceneBuilderSAH)); - - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX(features,BVH8InstanceSceneBuilderSAH)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX(features,BVH8InstanceMBSceneBuilderSAH)); - - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX(features,BVH8GridSceneBuilderSAH)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX(features,BVH8GridMBSceneBuilderSAH)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4SceneBuilderFastSpatialSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4vSceneBuilderFastSpatialSAH)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Quad4vSceneBuilderFastSpatialSAH)); - - IF_ENABLED_TRIS (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelTriangle4MeshSAH)); - IF_ENABLED_TRIS (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelTriangle4vMeshSAH)); - IF_ENABLED_TRIS (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelTriangle4iMeshSAH)); - IF_ENABLED_QUADS (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelQuadMeshSAH)); - IF_ENABLED_USER (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelVirtualSAH)); - IF_ENABLED_INSTANCE (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelInstanceSAH)); - } - - void BVH8Factory::selectIntersectors(int features) - { - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8v)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8iMB)); - - /* select intersectors1 */ - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersector1)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersector1MB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust1)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust1MB)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4Intersector1Moeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersector1Moeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vIntersector1Pluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersector1Pluecker)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vIntersector1Woop)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vMBIntersector1Moeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iMBIntersector1Moeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vMBIntersector1Pluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iMBIntersector1Pluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector1Moeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersector1Moeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector1Pluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersector1Pluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iMBIntersector1Moeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iMBIntersector1Pluecker)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,QBVH8Triangle4iIntersector1Pluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,QBVH8Triangle4Intersector1Moeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,QBVH8Quad4iIntersector1Pluecker)); - - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8VirtualIntersector1)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8VirtualMBIntersector1)); - - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8InstanceIntersector1)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8InstanceMBIntersector1)); - - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector1Moeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridMBIntersector1Moeller)) - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector1Pluecker)); - -#if defined (EMBREE_RAY_PACKETS) - - /* select intersectors4 */ - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersector4Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersector4HybridMB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust4Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust4HybridMB)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4Intersector4HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4Intersector4HybridMoellerNoFilter)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iIntersector4HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vIntersector4HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iIntersector4HybridPluecker)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vMBIntersector4HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iMBIntersector4HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vMBIntersector4HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iMBIntersector4HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector4HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector4HybridMoellerNoFilter)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4iIntersector4HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector4HybridPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4iIntersector4HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2(features,BVH8Quad4iMBIntersector4HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2(features,BVH8Quad4iMBIntersector4HybridPluecker)); - - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8VirtualIntersector4Chunk)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8VirtualMBIntersector4Chunk)); - - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8InstanceIntersector4Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8InstanceMBIntersector4Chunk)); - - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector4HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector4HybridPluecker)); - - /* select intersectors8 */ - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersector8Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersector8HybridMB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust8Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust8HybridMB)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4Intersector8HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4Intersector8HybridMoellerNoFilter)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iIntersector8HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vIntersector8HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iIntersector8HybridPluecker)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vMBIntersector8HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iMBIntersector8HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vMBIntersector8HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iMBIntersector8HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector8HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector8HybridMoellerNoFilter)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4iIntersector8HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector8HybridPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4iIntersector8HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2(features,BVH8Quad4iMBIntersector8HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2(features,BVH8Quad4iMBIntersector8HybridPluecker)); - - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8VirtualIntersector8Chunk)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8VirtualMBIntersector8Chunk)); - - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8InstanceIntersector8Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8InstanceMBIntersector8Chunk)); - - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector8HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector8HybridPluecker)); - - /* select intersectors16 */ - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersector16Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersector16HybridMB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust16Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust16HybridMB)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4Intersector16HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4Intersector16HybridMoellerNoFilter)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersector16HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4vIntersector16HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersector16HybridPluecker)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4vMBIntersector16HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4iMBIntersector16HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4vMBIntersector16HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4iMBIntersector16HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector16HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector16HybridMoellerNoFilter)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersector16HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector16HybridPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersector16HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4iMBIntersector16HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4iMBIntersector16HybridPluecker)); - - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8VirtualIntersector16Chunk)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8VirtualMBIntersector16Chunk)); - - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8InstanceIntersector16Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8InstanceMBIntersector16Chunk)); - - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8GridIntersector16HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8GridIntersector16HybridPluecker)); - - /* select stream intersectors */ - - SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8IntersectorStreamPacketFallback); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4IntersectorStreamMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4IntersectorStreamMoellerNoFilter)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersectorStreamMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vIntersectorStreamPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersectorStreamPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersectorStreamMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersectorStreamMoellerNoFilter)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersectorStreamMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersectorStreamPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersectorStreamPluecker)); - - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8VirtualIntersectorStream)); - - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8InstanceIntersectorStream)); - -#endif - } - - Accel::Intersectors BVH8Factory::BVH8OBBVirtualCurveIntersectors(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant) - { - switch (ivariant) { - case IntersectVariant::FAST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.leafIntersector = leafIntersector; - intersectors.intersector1 = BVH8OBBVirtualCurveIntersector1(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8OBBVirtualCurveIntersector4Hybrid(); - intersectors.intersector8 = BVH8OBBVirtualCurveIntersector8Hybrid(); - intersectors.intersector16 = BVH8OBBVirtualCurveIntersector16Hybrid(); - intersectors.intersectorN = BVH8IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - case IntersectVariant::ROBUST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.leafIntersector = leafIntersector; - intersectors.intersector1 = BVH8OBBVirtualCurveIntersectorRobust1(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8OBBVirtualCurveIntersectorRobust4Hybrid(); - intersectors.intersector8 = BVH8OBBVirtualCurveIntersectorRobust8Hybrid(); - intersectors.intersector16 = BVH8OBBVirtualCurveIntersectorRobust16Hybrid(); - intersectors.intersectorN = BVH8IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - default: assert(false); - } - return Accel::Intersectors(); - } - - Accel::Intersectors BVH8Factory::BVH8OBBVirtualCurveIntersectorsMB(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant) - { - switch (ivariant) { - case IntersectVariant::FAST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.leafIntersector = leafIntersector; - intersectors.intersector1 = BVH8OBBVirtualCurveIntersector1MB(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8OBBVirtualCurveIntersector4HybridMB(); - intersectors.intersector8 = BVH8OBBVirtualCurveIntersector8HybridMB(); - intersectors.intersector16 = BVH8OBBVirtualCurveIntersector16HybridMB(); - intersectors.intersectorN = BVH8IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - case IntersectVariant::ROBUST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.leafIntersector = leafIntersector; - intersectors.intersector1 = BVH8OBBVirtualCurveIntersectorRobust1MB(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8OBBVirtualCurveIntersectorRobust4HybridMB(); - intersectors.intersector8 = BVH8OBBVirtualCurveIntersectorRobust8HybridMB(); - intersectors.intersector16 = BVH8OBBVirtualCurveIntersectorRobust16HybridMB(); - intersectors.intersectorN = BVH8IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - default: assert(false); - } - return Accel::Intersectors(); - } - - Accel::Intersectors BVH8Factory::BVH8Triangle4Intersectors(BVH8* bvh, IntersectVariant ivariant) - { - assert(ivariant == IntersectVariant::FAST); - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8Triangle4Intersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4_filter = BVH8Triangle4Intersector4HybridMoeller(); - intersectors.intersector4_nofilter = BVH8Triangle4Intersector4HybridMoellerNoFilter(); - intersectors.intersector8_filter = BVH8Triangle4Intersector8HybridMoeller(); - intersectors.intersector8_nofilter = BVH8Triangle4Intersector8HybridMoellerNoFilter(); - intersectors.intersector16_filter = BVH8Triangle4Intersector16HybridMoeller(); - intersectors.intersector16_nofilter = BVH8Triangle4Intersector16HybridMoellerNoFilter(); - intersectors.intersectorN_filter = BVH8Triangle4IntersectorStreamMoeller(); - intersectors.intersectorN_nofilter = BVH8Triangle4IntersectorStreamMoellerNoFilter(); -#endif - return intersectors; - } - - Accel::Intersectors BVH8Factory::BVH8Triangle4vIntersectors(BVH8* bvh, IntersectVariant ivariant) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; -#define ENABLE_WOOP_TEST 0 -#if ENABLE_WOOP_TEST == 0 - //assert(ivariant == IntersectVariant::ROBUST); - intersectors.intersector1 = BVH8Triangle4vIntersector1Pluecker(); -#else - intersectors.intersector1 = BVH8Triangle4vIntersector1Woop(); -#endif - -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8Triangle4vIntersector4HybridPluecker(); - intersectors.intersector8 = BVH8Triangle4vIntersector8HybridPluecker(); - intersectors.intersector16 = BVH8Triangle4vIntersector16HybridPluecker(); - intersectors.intersectorN = BVH8Triangle4vIntersectorStreamPluecker(); -#endif - return intersectors; - } - - Accel::Intersectors BVH8Factory::BVH8Triangle4iIntersectors(BVH8* bvh, IntersectVariant ivariant) - { - switch (ivariant) { - case IntersectVariant::FAST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8Triangle4iIntersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8Triangle4iIntersector4HybridMoeller(); - intersectors.intersector8 = BVH8Triangle4iIntersector8HybridMoeller(); - intersectors.intersector16 = BVH8Triangle4iIntersector16HybridMoeller(); - intersectors.intersectorN = BVH8Triangle4iIntersectorStreamMoeller(); -#endif - return intersectors; - } - case IntersectVariant::ROBUST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8Triangle4iIntersector1Pluecker(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8Triangle4iIntersector4HybridPluecker(); - intersectors.intersector8 = BVH8Triangle4iIntersector8HybridPluecker(); - intersectors.intersector16 = BVH8Triangle4iIntersector16HybridPluecker(); - intersectors.intersectorN = BVH8Triangle4iIntersectorStreamPluecker(); -#endif - return intersectors; - } - } - return Accel::Intersectors(); - } - - Accel::Intersectors BVH8Factory::BVH8Triangle4vMBIntersectors(BVH8* bvh, IntersectVariant ivariant) - { - switch (ivariant) { - case IntersectVariant::FAST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8Triangle4vMBIntersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8Triangle4vMBIntersector4HybridMoeller(); - intersectors.intersector8 = BVH8Triangle4vMBIntersector8HybridMoeller(); - intersectors.intersector16 = BVH8Triangle4vMBIntersector16HybridMoeller(); - intersectors.intersectorN = BVH8IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - case IntersectVariant::ROBUST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8Triangle4vMBIntersector1Pluecker(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8Triangle4vMBIntersector4HybridPluecker(); - intersectors.intersector8 = BVH8Triangle4vMBIntersector8HybridPluecker(); - intersectors.intersector16 = BVH8Triangle4vMBIntersector16HybridPluecker(); - intersectors.intersectorN = BVH8IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - } - return Accel::Intersectors(); - } - - Accel::Intersectors BVH8Factory::BVH8Triangle4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant) - { - switch (ivariant) { - case IntersectVariant::FAST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8Triangle4iMBIntersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8Triangle4iMBIntersector4HybridMoeller(); - intersectors.intersector8 = BVH8Triangle4iMBIntersector8HybridMoeller(); - intersectors.intersector16 = BVH8Triangle4iMBIntersector16HybridMoeller(); - intersectors.intersectorN = BVH8IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - case IntersectVariant::ROBUST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8Triangle4iMBIntersector1Pluecker(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8Triangle4iMBIntersector4HybridPluecker(); - intersectors.intersector8 = BVH8Triangle4iMBIntersector8HybridPluecker(); - intersectors.intersector16 = BVH8Triangle4iMBIntersector16HybridPluecker(); - intersectors.intersectorN = BVH8IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - } - return Accel::Intersectors(); - } - - Accel::Intersectors BVH8Factory::BVH8Quad4vIntersectors(BVH8* bvh, IntersectVariant ivariant) - { - switch (ivariant) { - case IntersectVariant::FAST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8Quad4vIntersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4_filter = BVH8Quad4vIntersector4HybridMoeller(); - intersectors.intersector4_nofilter = BVH8Quad4vIntersector4HybridMoellerNoFilter(); - intersectors.intersector8_filter = BVH8Quad4vIntersector8HybridMoeller(); - intersectors.intersector8_nofilter = BVH8Quad4vIntersector8HybridMoellerNoFilter(); - intersectors.intersector16_filter = BVH8Quad4vIntersector16HybridMoeller(); - intersectors.intersector16_nofilter = BVH8Quad4vIntersector16HybridMoellerNoFilter(); - intersectors.intersectorN_filter = BVH8Quad4vIntersectorStreamMoeller(); - intersectors.intersectorN_nofilter = BVH8Quad4vIntersectorStreamMoellerNoFilter(); -#endif - return intersectors; - } - case IntersectVariant::ROBUST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8Quad4vIntersector1Pluecker(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8Quad4vIntersector4HybridPluecker(); - intersectors.intersector8 = BVH8Quad4vIntersector8HybridPluecker(); - intersectors.intersector16 = BVH8Quad4vIntersector16HybridPluecker(); - intersectors.intersectorN = BVH8Quad4vIntersectorStreamPluecker(); -#endif - return intersectors; - } - } - return Accel::Intersectors(); - } - - Accel::Intersectors BVH8Factory::BVH8Quad4iIntersectors(BVH8* bvh, IntersectVariant ivariant) - { - switch (ivariant) { - case IntersectVariant::FAST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8Quad4iIntersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8Quad4iIntersector4HybridMoeller(); - intersectors.intersector8 = BVH8Quad4iIntersector8HybridMoeller(); - intersectors.intersector16 = BVH8Quad4iIntersector16HybridMoeller(); - intersectors.intersectorN = BVH8Quad4iIntersectorStreamMoeller(); -#endif - return intersectors; - } - case IntersectVariant::ROBUST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8Quad4iIntersector1Pluecker(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8Quad4iIntersector4HybridPluecker(); - intersectors.intersector8 = BVH8Quad4iIntersector8HybridPluecker(); - intersectors.intersector16 = BVH8Quad4iIntersector16HybridPluecker(); - intersectors.intersectorN = BVH8Quad4iIntersectorStreamPluecker(); -#endif - return intersectors; - } - } - return Accel::Intersectors(); - } - - Accel::Intersectors BVH8Factory::BVH8Quad4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant) - { - switch (ivariant) { - case IntersectVariant::FAST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8Quad4iMBIntersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8Quad4iMBIntersector4HybridMoeller(); - intersectors.intersector8 = BVH8Quad4iMBIntersector8HybridMoeller(); - intersectors.intersector16 = BVH8Quad4iMBIntersector16HybridMoeller(); - intersectors.intersectorN = BVH8IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - case IntersectVariant::ROBUST: - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8Quad4iMBIntersector1Pluecker(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8Quad4iMBIntersector4HybridPluecker(); - intersectors.intersector8 = BVH8Quad4iMBIntersector8HybridPluecker(); - intersectors.intersector16 = BVH8Quad4iMBIntersector16HybridPluecker(); - intersectors.intersectorN = BVH8IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - } - return Accel::Intersectors(); - } - - Accel::Intersectors BVH8Factory::QBVH8Triangle4iIntersectors(BVH8* bvh) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = QBVH8Triangle4iIntersector1Pluecker(); - return intersectors; - } - - Accel::Intersectors BVH8Factory::QBVH8Triangle4Intersectors(BVH8* bvh) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = QBVH8Triangle4Intersector1Moeller(); - return intersectors; - } - - Accel::Intersectors BVH8Factory::QBVH8Quad4iIntersectors(BVH8* bvh) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = QBVH8Quad4iIntersector1Pluecker(); - return intersectors; - } - - Accel::Intersectors BVH8Factory::BVH8UserGeometryIntersectors(BVH8* bvh) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8VirtualIntersector1(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8VirtualIntersector4Chunk(); - intersectors.intersector8 = BVH8VirtualIntersector8Chunk(); - intersectors.intersector16 = BVH8VirtualIntersector16Chunk(); - intersectors.intersectorN = BVH8VirtualIntersectorStream(); -#endif - intersectors.collider = BVH8ColliderUserGeom(); - return intersectors; - } - - Accel::Intersectors BVH8Factory::BVH8UserGeometryMBIntersectors(BVH8* bvh) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8VirtualMBIntersector1(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8VirtualMBIntersector4Chunk(); - intersectors.intersector8 = BVH8VirtualMBIntersector8Chunk(); - intersectors.intersector16 = BVH8VirtualMBIntersector16Chunk(); - intersectors.intersectorN = BVH8IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - - Accel::Intersectors BVH8Factory::BVH8InstanceIntersectors(BVH8* bvh) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8InstanceIntersector1(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8InstanceIntersector4Chunk(); - intersectors.intersector8 = BVH8InstanceIntersector8Chunk(); - intersectors.intersector16 = BVH8InstanceIntersector16Chunk(); - intersectors.intersectorN = BVH8InstanceIntersectorStream(); -#endif - return intersectors; - } - - Accel::Intersectors BVH8Factory::BVH8InstanceMBIntersectors(BVH8* bvh) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8InstanceMBIntersector1(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8InstanceMBIntersector4Chunk(); - intersectors.intersector8 = BVH8InstanceMBIntersector8Chunk(); - intersectors.intersector16 = BVH8InstanceMBIntersector16Chunk(); - intersectors.intersectorN = BVH8IntersectorStreamPacketFallback(); -#endif - return intersectors; - } - - Accel* BVH8Factory::BVH8OBBVirtualCurve8v(Scene* scene, IntersectVariant ivariant) - { - BVH8* accel = new BVH8(Curve8v::type,scene); - Accel::Intersectors intersectors = BVH8OBBVirtualCurveIntersectors(accel,VirtualCurveIntersector8v(),ivariant); - Builder* builder = BVH8Curve8vBuilder_OBB_New(accel,scene,0); - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH8Factory::BVH8OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant) - { - BVH8* accel = new BVH8(Curve8iMB::type,scene); - Accel::Intersectors intersectors = BVH8OBBVirtualCurveIntersectorsMB(accel,VirtualCurveIntersector8iMB(),ivariant); - Builder* builder = BVH8OBBCurve8iMBBuilder_OBB(accel,scene,0); - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH8Factory::BVH8Triangle4(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH8* accel = new BVH8(Triangle4::type,scene); - Accel::Intersectors intersectors= BVH8Triangle4Intersectors(accel,ivariant); - Builder* builder = nullptr; - if (scene->device->tri_builder == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH8Triangle4SceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelTriangle4MeshSAH(accel,scene,false); break; - case BuildVariant::HIGH_QUALITY: builder = BVH8Triangle4SceneBuilderFastSpatialSAH(accel,scene,0); break; - } - } - else if (scene->device->tri_builder == "sah" ) builder = BVH8Triangle4SceneBuilderSAH(accel,scene,0); - else if (scene->device->tri_builder == "sah_fast_spatial") builder = BVH8Triangle4SceneBuilderFastSpatialSAH(accel,scene,0); - else if (scene->device->tri_builder == "sah_presplit") builder = BVH8Triangle4SceneBuilderSAH(accel,scene,MODE_HIGH_QUALITY); - else if (scene->device->tri_builder == "dynamic" ) builder = BVH8BuilderTwoLevelTriangle4MeshSAH(accel,scene,false); - else if (scene->device->tri_builder == "morton" ) builder = BVH8BuilderTwoLevelTriangle4MeshSAH(accel,scene,true); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH8<Triangle4>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH8Factory::BVH8Triangle4v(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH8* accel = new BVH8(Triangle4v::type,scene); - Accel::Intersectors intersectors= BVH8Triangle4vIntersectors(accel,ivariant); - Builder* builder = nullptr; - if (scene->device->tri_builder == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH8Triangle4vSceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelTriangle4vMeshSAH(accel,scene,false); break; - case BuildVariant::HIGH_QUALITY: builder = BVH8Triangle4vSceneBuilderFastSpatialSAH(accel,scene,0); break; - } - } - else if (scene->device->tri_builder == "sah_fast_spatial") builder = BVH8Triangle4SceneBuilderFastSpatialSAH(accel,scene,0); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH8<Triangle4v>"); - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH8Factory::BVH8Triangle4i(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH8* accel = new BVH8(Triangle4i::type,scene); - Accel::Intersectors intersectors = BVH8Triangle4iIntersectors(accel,ivariant); - - Builder* builder = nullptr; - if (scene->device->tri_builder == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH8Triangle4iSceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelTriangle4iMeshSAH(accel,scene,false); break; - case BuildVariant::HIGH_QUALITY: assert(false); break; // FIXME: implement - } - } - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH8<Triangle4i>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH8Factory::BVH8Triangle4iMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH8* accel = new BVH8(Triangle4i::type,scene); - Accel::Intersectors intersectors = BVH8Triangle4iMBIntersectors(accel,ivariant); - - Builder* builder = nullptr; - if (scene->device->tri_builder_mb == "default") { // FIXME: implement - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH8Triangle4iMBSceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement - case BuildVariant::HIGH_QUALITY: assert(false); break; - } - } - else if (scene->device->tri_builder_mb == "internal_time_splits") builder = BVH8Triangle4iMBSceneBuilderSAH(accel,scene,0); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder_mb+" for BVH8<Triangle4iMB>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH8Factory::BVH8Triangle4vMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH8* accel = new BVH8(Triangle4vMB::type,scene); - Accel::Intersectors intersectors= BVH8Triangle4vMBIntersectors(accel,ivariant); - - Builder* builder = nullptr; - if (scene->device->tri_builder_mb == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH8Triangle4vMBSceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement - case BuildVariant::HIGH_QUALITY: assert(false); break; - } - } - else if (scene->device->tri_builder_mb == "internal_time_splits") builder = BVH8Triangle4vMBSceneBuilderSAH(accel,scene,0); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder_mb+" for BVH8<Triangle4vMB>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH8Factory::BVH8QuantizedTriangle4i(Scene* scene) - { - BVH8* accel = new BVH8(Triangle4i::type,scene); - Accel::Intersectors intersectors = QBVH8Triangle4iIntersectors(accel); - Builder* builder = BVH8QuantizedTriangle4iSceneBuilderSAH(accel,scene,0); - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH8Factory::BVH8QuantizedTriangle4(Scene* scene) - { - BVH8* accel = new BVH8(Triangle4::type,scene); - Accel::Intersectors intersectors = QBVH8Triangle4Intersectors(accel); - Builder* builder = BVH8QuantizedTriangle4SceneBuilderSAH(accel,scene,0); - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH8Factory::BVH8Quad4v(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH8* accel = new BVH8(Quad4v::type,scene); - Accel::Intersectors intersectors = BVH8Quad4vIntersectors(accel,ivariant); - - Builder* builder = nullptr; - if (scene->device->quad_builder == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH8Quad4vSceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelQuadMeshSAH(accel,scene,false); break; - case BuildVariant::HIGH_QUALITY: builder = BVH8Quad4vSceneBuilderFastSpatialSAH(accel,scene,0); break; - } - } - else if (scene->device->quad_builder == "dynamic" ) builder = BVH8BuilderTwoLevelQuadMeshSAH(accel,scene,false); - else if (scene->device->quad_builder == "morton" ) builder = BVH8BuilderTwoLevelQuadMeshSAH(accel,scene,true); - else if (scene->device->quad_builder == "sah_fast_spatial" ) builder = BVH8Quad4vSceneBuilderFastSpatialSAH(accel,scene,0); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder+" for BVH8<Quad4v>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH8Factory::BVH8Quad4i(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH8* accel = new BVH8(Quad4i::type,scene); - Accel::Intersectors intersectors = BVH8Quad4iIntersectors(accel,ivariant); - - Builder* builder = nullptr; - if (scene->device->quad_builder == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH8Quad4iSceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement - case BuildVariant::HIGH_QUALITY: assert(false); break; // FIXME: implement - } - } - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder+" for BVH8<Quad4i>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH8Factory::BVH8Quad4iMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH8* accel = new BVH8(Quad4i::type,scene); - Accel::Intersectors intersectors = BVH8Quad4iMBIntersectors(accel,ivariant); - - Builder* builder = nullptr; - if (scene->device->quad_builder_mb == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH8Quad4iMBSceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement - case BuildVariant::HIGH_QUALITY: assert(false); break; - } - } - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder_mb+" for BVH8<Quad4i>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH8Factory::BVH8QuantizedQuad4i(Scene* scene) - { - BVH8* accel = new BVH8(Quad4i::type,scene); - Accel::Intersectors intersectors = QBVH8Quad4iIntersectors(accel); - Builder* builder = nullptr; - if (scene->device->quad_builder == "default" ) builder = BVH8QuantizedQuad4iSceneBuilderSAH(accel,scene,0); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder+" for QBVH8<Quad4i>"); - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH8Factory::BVH8UserGeometry(Scene* scene, BuildVariant bvariant) - { - BVH8* accel = new BVH8(Object::type,scene); - Accel::Intersectors intersectors = BVH8UserGeometryIntersectors(accel); - - Builder* builder = nullptr; - if (scene->device->object_builder == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH8VirtualSceneBuilderSAH(accel,scene,0); break; - case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelVirtualSAH(accel,scene,false); break; - case BuildVariant::HIGH_QUALITY: assert(false); break; - } - } - else if (scene->device->object_builder == "sah") builder = BVH8VirtualSceneBuilderSAH(accel,scene,0); - else if (scene->device->object_builder == "dynamic") builder = BVH8BuilderTwoLevelVirtualSAH(accel,scene,false); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH8<Object>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH8Factory::BVH8UserGeometryMB(Scene* scene) - { - BVH8* accel = new BVH8(Object::type,scene); - Accel::Intersectors intersectors = BVH8UserGeometryMBIntersectors(accel); - Builder* builder = BVH8VirtualMBSceneBuilderSAH(accel,scene,0); - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH8Factory::BVH8Instance(Scene* scene, bool isExpensive, BuildVariant bvariant) - { - BVH8* accel = new BVH8(InstancePrimitive::type,scene); - Accel::Intersectors intersectors = BVH8InstanceIntersectors(accel); - auto gtype = isExpensive ? Geometry::MTY_INSTANCE_EXPENSIVE : Geometry::MTY_INSTANCE; - // Builder* builder = BVH8InstanceSceneBuilderSAH(accel,scene,gtype); - - Builder* builder = nullptr; - if (scene->device->object_builder == "default") { - switch (bvariant) { - case BuildVariant::STATIC : builder = BVH8InstanceSceneBuilderSAH(accel,scene,gtype);; break; - case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelInstanceSAH(accel,scene,gtype,false); break; - case BuildVariant::HIGH_QUALITY: assert(false); break; - } - } - else if (scene->device->object_builder == "sah") builder = BVH8InstanceSceneBuilderSAH(accel,scene,gtype); - else if (scene->device->object_builder == "dynamic") builder = BVH8BuilderTwoLevelInstanceSAH(accel,scene,gtype,false); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH8<Object>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH8Factory::BVH8InstanceMB(Scene* scene, bool isExpensive) - { - BVH8* accel = new BVH8(InstancePrimitive::type,scene); - Accel::Intersectors intersectors = BVH8InstanceMBIntersectors(accel); - auto gtype = isExpensive ? Geometry::MTY_INSTANCE_EXPENSIVE : Geometry::MTY_INSTANCE; - Builder* builder = BVH8InstanceMBSceneBuilderSAH(accel,scene,gtype); - return new AccelInstance(accel,builder,intersectors); - } - - Accel::Intersectors BVH8Factory::BVH8GridIntersectors(BVH8* bvh, IntersectVariant ivariant) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - if (ivariant == IntersectVariant::FAST) - { - intersectors.intersector1 = BVH8GridIntersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8GridIntersector4HybridMoeller(); - intersectors.intersector8 = BVH8GridIntersector8HybridMoeller(); - intersectors.intersector16 = BVH8GridIntersector16HybridMoeller(); - intersectors.intersectorN = BVH8IntersectorStreamPacketFallback(); -#endif - } - else /* if (ivariant == IntersectVariant::ROBUST) */ - { - intersectors.intersector1 = BVH8GridIntersector1Pluecker(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = BVH8GridIntersector4HybridPluecker(); - intersectors.intersector8 = BVH8GridIntersector8HybridPluecker(); - intersectors.intersector16 = BVH8GridIntersector16HybridPluecker(); - intersectors.intersectorN = BVH8IntersectorStreamPacketFallback(); -#endif - } - return intersectors; - } - - Accel::Intersectors BVH8Factory::BVH8GridMBIntersectors(BVH8* bvh, IntersectVariant ivariant) - { - Accel::Intersectors intersectors; - intersectors.ptr = bvh; - intersectors.intersector1 = BVH8GridMBIntersector1Moeller(); -#if defined (EMBREE_RAY_PACKETS) - intersectors.intersector4 = nullptr; - intersectors.intersector8 = nullptr; - intersectors.intersector16 = nullptr; - intersectors.intersectorN = nullptr; -#endif - return intersectors; - } - - Accel* BVH8Factory::BVH8Grid(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH8* accel = new BVH8(SubGridQBVH8::type,scene); - Accel::Intersectors intersectors = BVH8GridIntersectors(accel,ivariant); - Builder* builder = nullptr; - if (scene->device->grid_builder == "default") { - builder = BVH8GridSceneBuilderSAH(accel,scene,0); - } - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH4<GridMesh>"); - - return new AccelInstance(accel,builder,intersectors); - } - - Accel* BVH8Factory::BVH8GridMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant) - { - BVH8* accel = new BVH8(SubGridQBVH8::type,scene); - Accel::Intersectors intersectors = BVH8GridMBIntersectors(accel,ivariant); - Builder* builder = nullptr; - if (scene->device->grid_builder_mb == "default") { - builder = BVH8GridMBSceneBuilderSAH(accel,scene,0); - } - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH8MB<GridMesh>"); - return new AccelInstance(accel,builder,intersectors); - } -} - -#endif diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.h b/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.h deleted file mode 100644 index b92188e7d3..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.h +++ /dev/null @@ -1,280 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh_factory.h" - -namespace embree -{ - /*! BVH8 instantiations */ - class BVH8Factory : public BVHFactory - { - public: - BVH8Factory(int bfeatures, int ifeatures); - - public: - Accel* BVH8OBBVirtualCurve8v(Scene* scene, IntersectVariant ivariant); - Accel* BVH8OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant); - DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8v); - DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8iMB); - - Accel* BVH8Triangle4 (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - Accel* BVH8Triangle4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - Accel* BVH8Triangle4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - Accel* BVH8Triangle4vMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - Accel* BVH8Triangle4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - - Accel* BVH8Quad4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - Accel* BVH8Quad4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - Accel* BVH8Quad4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - - Accel* BVH8QuantizedTriangle4i(Scene* scene); - Accel* BVH8QuantizedTriangle4(Scene* scene); - Accel* BVH8QuantizedQuad4i(Scene* scene); - - Accel* BVH8UserGeometry(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC); - Accel* BVH8UserGeometryMB(Scene* scene); - - Accel* BVH8Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC); - Accel* BVH8InstanceMB(Scene* scene, bool isExpensive); - - Accel* BVH8Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - Accel* BVH8GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST); - - private: - void selectBuilders(int features); - void selectIntersectors(int features); - - private: - Accel::Intersectors BVH8OBBVirtualCurveIntersectors(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant); - Accel::Intersectors BVH8OBBVirtualCurveIntersectorsMB(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant); - - Accel::Intersectors BVH8Triangle4Intersectors(BVH8* bvh, IntersectVariant ivariant); - Accel::Intersectors BVH8Triangle4vIntersectors(BVH8* bvh, IntersectVariant ivariant); - Accel::Intersectors BVH8Triangle4iIntersectors(BVH8* bvh, IntersectVariant ivariant); - Accel::Intersectors BVH8Triangle4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant); - Accel::Intersectors BVH8Triangle4vMBIntersectors(BVH8* bvh, IntersectVariant ivariant); - - Accel::Intersectors BVH8Quad4vIntersectors(BVH8* bvh, IntersectVariant ivariant); - Accel::Intersectors BVH8Quad4iIntersectors(BVH8* bvh, IntersectVariant ivariant); - Accel::Intersectors BVH8Quad4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant); - - Accel::Intersectors QBVH8Triangle4iIntersectors(BVH8* bvh); - Accel::Intersectors QBVH8Triangle4Intersectors(BVH8* bvh); - Accel::Intersectors QBVH8Quad4iIntersectors(BVH8* bvh); - - Accel::Intersectors BVH8UserGeometryIntersectors(BVH8* bvh); - Accel::Intersectors BVH8UserGeometryMBIntersectors(BVH8* bvh); - - Accel::Intersectors BVH8InstanceIntersectors(BVH8* bvh); - Accel::Intersectors BVH8InstanceMBIntersectors(BVH8* bvh); - - Accel::Intersectors BVH8GridIntersectors(BVH8* bvh, IntersectVariant ivariant); - Accel::Intersectors BVH8GridMBIntersectors(BVH8* bvh, IntersectVariant ivariant); - - private: - DEFINE_SYMBOL2(Accel::Collider,BVH8ColliderUserGeom); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1); - DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1MB); - DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1); - DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1MB); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4Intersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Pluecker); - DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Pluecker); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Pluecker); - DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Pluecker); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Woop); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Pluecker); - DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Pluecker); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Pluecker); - - DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4iIntersector1Pluecker); - DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4Intersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Quad4iIntersector1Pluecker); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH8VirtualIntersector1); - DEFINE_SYMBOL2(Accel::Intersector1,BVH8VirtualMBIntersector1); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceIntersector1); - DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceMBIntersector1); - - DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridMBIntersector1Moeller); - DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Pluecker); - - DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4Hybrid); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4HybridMB); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4Hybrid); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4HybridMB); - - DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoellerNoFilter); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vIntersector4HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoellerNoFilter); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector4,BVH8VirtualIntersector4Chunk); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8VirtualMBIntersector4Chunk); - - DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceIntersector4Chunk); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceMBIntersector4Chunk); - - DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8Hybrid); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8HybridMB); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8Hybrid); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8HybridMB); - - DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoellerNoFilter); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vIntersector8HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoellerNoFilter); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector8,BVH8VirtualIntersector8Chunk); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8VirtualMBIntersector8Chunk); - - DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceIntersector8Chunk); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceMBIntersector8Chunk); - - DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16Hybrid); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16HybridMB); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16Hybrid); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16HybridMB); - - DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoellerNoFilter); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vIntersector16HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoellerNoFilter); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridPluecker); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridPluecker); - - DEFINE_SYMBOL2(Accel::Intersector16,BVH8VirtualIntersector16Chunk); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8VirtualMBIntersector16Chunk); - - DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceIntersector16Chunk); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceMBIntersector16Chunk); - - DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridMoeller); - DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridPluecker); - - DEFINE_SYMBOL2(Accel::IntersectorN,BVH8IntersectorStreamPacketFallback); - - DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoeller); - DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoellerNoFilter); - DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamMoeller); - DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4vIntersectorStreamPluecker); - DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamPluecker); - - DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoeller); - DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoellerNoFilter); - DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamMoeller); - DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamPluecker); - DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamPluecker); - - DEFINE_SYMBOL2(Accel::IntersectorN,BVH8VirtualIntersectorStream); - - DEFINE_SYMBOL2(Accel::IntersectorN,BVH8InstanceIntersectorStream); - - // SAH scene builders - private: - DEFINE_ISA_FUNCTION(Builder*,BVH8Curve8vBuilder_OBB_New,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH8OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t); - - DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t); - - DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - - DEFINE_ISA_FUNCTION(Builder*,BVH8VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH8VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - - DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask); - DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask); - - DEFINE_ISA_FUNCTION(Builder*,BVH8GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH8GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t); - - // SAH spatial scene builders - private: - DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t); - DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t); - - // twolevel scene builders - private: - DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool); - DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool); - DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool); - DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool); - DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool); - DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool); - }; -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.cpp deleted file mode 100644 index e832537ec5..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.cpp +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "bvh_builder.h" - -namespace embree -{ - namespace isa - { - template<int N> - typename BVHN<N>::NodeRef BVHNBuilderVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings) - { - auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRef { - return createLeaf(prims,set,alloc); - }; - - settings.branchingFactor = N; - settings.maxDepth = BVH::maxBuildDepthLeaf; - return BVHBuilderBinnedSAH::build<NodeRef> - (FastAllocator::Create(allocator),typename BVH::AABBNode::Create2(),typename BVH::AABBNode::Set3(allocator,prims),createLeafFunc,progressFunc,prims,pinfo,settings); - } - - - template<int N> - typename BVHN<N>::NodeRef BVHNBuilderQuantizedVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings) - { - auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRef { - return createLeaf(prims,set,alloc); - }; - - settings.branchingFactor = N; - settings.maxDepth = BVH::maxBuildDepthLeaf; - return BVHBuilderBinnedSAH::build<NodeRef> - (FastAllocator::Create(allocator),typename BVH::QuantizedNode::Create2(),typename BVH::QuantizedNode::Set2(),createLeafFunc,progressFunc,prims,pinfo,settings); - } - - template<int N> - typename BVHN<N>::NodeRecordMB BVHNBuilderMblurVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange) - { - auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRecordMB { - return createLeaf(prims,set,alloc); - }; - - settings.branchingFactor = N; - settings.maxDepth = BVH::maxBuildDepthLeaf; - return BVHBuilderBinnedSAH::build<NodeRecordMB> - (FastAllocator::Create(allocator),typename BVH::AABBNodeMB::Create(),typename BVH::AABBNodeMB::SetTimeRange(timeRange),createLeafFunc,progressFunc,prims,pinfo,settings); - } - - template struct BVHNBuilderVirtual<4>; - template struct BVHNBuilderQuantizedVirtual<4>; - template struct BVHNBuilderMblurVirtual<4>; - -#if defined(__AVX__) - template struct BVHNBuilderVirtual<8>; - template struct BVHNBuilderQuantizedVirtual<8>; - template struct BVHNBuilderMblurVirtual<8>; -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.h deleted file mode 100644 index 1b86bb45ad..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.h +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "bvh.h" -#include "../builders/bvh_builder_sah.h" - -namespace embree -{ - namespace isa - { - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - - template<int N> - struct BVHNBuilderVirtual - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef FastAllocator::CachedAllocator Allocator; - - struct BVHNBuilderV { - NodeRef build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings); - virtual NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0; - }; - - template<typename CreateLeafFunc> - struct BVHNBuilderT : public BVHNBuilderV - { - BVHNBuilderT (CreateLeafFunc createLeafFunc) - : createLeafFunc(createLeafFunc) {} - - NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) { - return createLeafFunc(prims,set,alloc); - } - - private: - CreateLeafFunc createLeafFunc; - }; - - template<typename CreateLeafFunc> - static NodeRef build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings) { - return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings); - } - }; - - template<int N> - struct BVHNBuilderQuantizedVirtual - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef FastAllocator::CachedAllocator Allocator; - - struct BVHNBuilderV { - NodeRef build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings); - virtual NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0; - }; - - template<typename CreateLeafFunc> - struct BVHNBuilderT : public BVHNBuilderV - { - BVHNBuilderT (CreateLeafFunc createLeafFunc) - : createLeafFunc(createLeafFunc) {} - - NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) { - return createLeafFunc(prims,set,alloc); - } - - private: - CreateLeafFunc createLeafFunc; - }; - - template<typename CreateLeafFunc> - static NodeRef build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings) { - return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings); - } - }; - - template<int N> - struct BVHNBuilderMblurVirtual - { - typedef BVHN<N> BVH; - typedef typename BVH::AABBNodeMB AABBNodeMB; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::NodeRecordMB NodeRecordMB; - typedef FastAllocator::CachedAllocator Allocator; - - struct BVHNBuilderV { - NodeRecordMB build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange); - virtual NodeRecordMB createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0; - }; - - template<typename CreateLeafFunc> - struct BVHNBuilderT : public BVHNBuilderV - { - BVHNBuilderT (CreateLeafFunc createLeafFunc) - : createLeafFunc(createLeafFunc) {} - - NodeRecordMB createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) { - return createLeafFunc(prims,set,alloc); - } - - private: - CreateLeafFunc createLeafFunc; - }; - - template<typename CreateLeafFunc> - static NodeRecordMB build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange) { - return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings,timeRange); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_morton.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_morton.cpp deleted file mode 100644 index 64759c1294..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_morton.cpp +++ /dev/null @@ -1,531 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "bvh.h" -#include "bvh_statistics.h" -#include "bvh_rotate.h" -#include "../common/profile.h" -#include "../../common/algorithms/parallel_prefix_sum.h" - -#include "../builders/primrefgen.h" -#include "../builders/bvh_builder_morton.h" - -#include "../geometry/triangle.h" -#include "../geometry/trianglev.h" -#include "../geometry/trianglei.h" -#include "../geometry/quadv.h" -#include "../geometry/quadi.h" -#include "../geometry/object.h" -#include "../geometry/instance.h" - -#if defined(__X86_64__) || defined(__aarch64__) -# define ROTATE_TREE 1 // specifies number of tree rotation rounds to perform -#else -# define ROTATE_TREE 0 // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues -#endif - -namespace embree -{ - namespace isa - { - template<int N> - struct SetBVHNBounds - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::NodeRecord NodeRecord; - typedef typename BVH::AABBNode AABBNode; - - BVH* bvh; - __forceinline SetBVHNBounds (BVH* bvh) : bvh(bvh) {} - - __forceinline NodeRecord operator() (NodeRef ref, const NodeRecord* children, size_t num) - { - AABBNode* node = ref.getAABBNode(); - - BBox3fa res = empty; - for (size_t i=0; i<num; i++) { - const BBox3fa b = children[i].bounds; - res.extend(b); - node->setRef(i,children[i].ref); - node->setBounds(i,b); - } - - BBox3fx result = (BBox3fx&)res; -#if ROTATE_TREE - if (N == 4) - { - size_t n = 0; - for (size_t i=0; i<num; i++) - n += children[i].bounds.lower.a; - - if (n >= 4096) { - for (size_t i=0; i<num; i++) { - if (children[i].bounds.lower.a < 4096) { - for (int j=0; j<ROTATE_TREE; j++) - BVHNRotate<N>::rotate(node->child(i)); - node->child(i).setBarrier(); - } - } - } - result.lower.a = unsigned(n); - } -#endif - - return NodeRecord(ref,result); - } - }; - - template<int N, typename Primitive> - struct CreateMortonLeaf; - - template<int N> - struct CreateMortonLeaf<N,Triangle4> - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::NodeRecord NodeRecord; - - __forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton) - : mesh(mesh), morton(morton), geomID_(geomID) {} - - __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc) - { - vfloat4 lower(pos_inf); - vfloat4 upper(neg_inf); - size_t items = current.size(); - size_t start = current.begin(); - assert(items<=4); - - /* allocate leaf node */ - Triangle4* accel = (Triangle4*) alloc.malloc1(sizeof(Triangle4),BVH::byteAlignment); - NodeRef ref = BVH::encodeLeaf((char*)accel,1); - vuint4 vgeomID = -1, vprimID = -1; - Vec3vf4 v0 = zero, v1 = zero, v2 = zero; - const TriangleMesh* __restrict__ const mesh = this->mesh; - - for (size_t i=0; i<items; i++) - { - const unsigned int primID = morton[start+i].index; - const TriangleMesh::Triangle& tri = mesh->triangle(primID); - const Vec3fa& p0 = mesh->vertex(tri.v[0]); - const Vec3fa& p1 = mesh->vertex(tri.v[1]); - const Vec3fa& p2 = mesh->vertex(tri.v[2]); - lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2); - upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2); - vgeomID [i] = geomID_; - vprimID [i] = primID; - v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; - v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; - v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; - } - - Triangle4::store_nt(accel,Triangle4(v0,v1,v2,vgeomID,vprimID)); - BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper); -#if ROTATE_TREE - if (N == 4) - box_o.lower.a = unsigned(current.size()); -#endif - return NodeRecord(ref,box_o); - } - - private: - TriangleMesh* mesh; - BVHBuilderMorton::BuildPrim* morton; - unsigned int geomID_ = std::numeric_limits<unsigned int>::max(); - }; - - template<int N> - struct CreateMortonLeaf<N,Triangle4v> - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::NodeRecord NodeRecord; - - __forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton) - : mesh(mesh), morton(morton), geomID_(geomID) {} - - __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc) - { - vfloat4 lower(pos_inf); - vfloat4 upper(neg_inf); - size_t items = current.size(); - size_t start = current.begin(); - assert(items<=4); - - /* allocate leaf node */ - Triangle4v* accel = (Triangle4v*) alloc.malloc1(sizeof(Triangle4v),BVH::byteAlignment); - NodeRef ref = BVH::encodeLeaf((char*)accel,1); - vuint4 vgeomID = -1, vprimID = -1; - Vec3vf4 v0 = zero, v1 = zero, v2 = zero; - const TriangleMesh* __restrict__ mesh = this->mesh; - - for (size_t i=0; i<items; i++) - { - const unsigned int primID = morton[start+i].index; - const TriangleMesh::Triangle& tri = mesh->triangle(primID); - const Vec3fa& p0 = mesh->vertex(tri.v[0]); - const Vec3fa& p1 = mesh->vertex(tri.v[1]); - const Vec3fa& p2 = mesh->vertex(tri.v[2]); - lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2); - upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2); - vgeomID [i] = geomID_; - vprimID [i] = primID; - v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; - v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; - v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; - } - Triangle4v::store_nt(accel,Triangle4v(v0,v1,v2,vgeomID,vprimID)); - BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper); -#if ROTATE_TREE - if (N == 4) - box_o.lower.a = current.size(); -#endif - return NodeRecord(ref,box_o); - } - private: - TriangleMesh* mesh; - BVHBuilderMorton::BuildPrim* morton; - unsigned int geomID_ = std::numeric_limits<unsigned int>::max(); - }; - - template<int N> - struct CreateMortonLeaf<N,Triangle4i> - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::NodeRecord NodeRecord; - - __forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton) - : mesh(mesh), morton(morton), geomID_(geomID) {} - - __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc) - { - vfloat4 lower(pos_inf); - vfloat4 upper(neg_inf); - size_t items = current.size(); - size_t start = current.begin(); - assert(items<=4); - - /* allocate leaf node */ - Triangle4i* accel = (Triangle4i*) alloc.malloc1(sizeof(Triangle4i),BVH::byteAlignment); - NodeRef ref = BVH::encodeLeaf((char*)accel,1); - - vuint4 v0 = zero, v1 = zero, v2 = zero; - vuint4 vgeomID = -1, vprimID = -1; - const TriangleMesh* __restrict__ const mesh = this->mesh; - - for (size_t i=0; i<items; i++) - { - const unsigned int primID = morton[start+i].index; - const TriangleMesh::Triangle& tri = mesh->triangle(primID); - const Vec3fa& p0 = mesh->vertex(tri.v[0]); - const Vec3fa& p1 = mesh->vertex(tri.v[1]); - const Vec3fa& p2 = mesh->vertex(tri.v[2]); - lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2); - upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2); - vgeomID[i] = geomID_; - vprimID[i] = primID; - unsigned int int_stride = mesh->vertices0.getStride()/4; - v0[i] = tri.v[0] * int_stride; - v1[i] = tri.v[1] * int_stride; - v2[i] = tri.v[2] * int_stride; - } - - for (size_t i=items; i<4; i++) - { - vgeomID[i] = vgeomID[0]; - vprimID[i] = -1; - v0[i] = 0; - v1[i] = 0; - v2[i] = 0; - } - Triangle4i::store_nt(accel,Triangle4i(v0,v1,v2,vgeomID,vprimID)); - BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper); -#if ROTATE_TREE - if (N == 4) - box_o.lower.a = current.size(); -#endif - return NodeRecord(ref,box_o); - } - private: - TriangleMesh* mesh; - BVHBuilderMorton::BuildPrim* morton; - unsigned int geomID_ = std::numeric_limits<unsigned int>::max(); - }; - - template<int N> - struct CreateMortonLeaf<N,Quad4v> - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::NodeRecord NodeRecord; - - __forceinline CreateMortonLeaf (QuadMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton) - : mesh(mesh), morton(morton), geomID_(geomID) {} - - __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc) - { - vfloat4 lower(pos_inf); - vfloat4 upper(neg_inf); - size_t items = current.size(); - size_t start = current.begin(); - assert(items<=4); - - /* allocate leaf node */ - Quad4v* accel = (Quad4v*) alloc.malloc1(sizeof(Quad4v),BVH::byteAlignment); - NodeRef ref = BVH::encodeLeaf((char*)accel,1); - - vuint4 vgeomID = -1, vprimID = -1; - Vec3vf4 v0 = zero, v1 = zero, v2 = zero, v3 = zero; - const QuadMesh* __restrict__ mesh = this->mesh; - - for (size_t i=0; i<items; i++) - { - const unsigned int primID = morton[start+i].index; - const QuadMesh::Quad& tri = mesh->quad(primID); - const Vec3fa& p0 = mesh->vertex(tri.v[0]); - const Vec3fa& p1 = mesh->vertex(tri.v[1]); - const Vec3fa& p2 = mesh->vertex(tri.v[2]); - const Vec3fa& p3 = mesh->vertex(tri.v[3]); - lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2,(vfloat4)p3); - upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2,(vfloat4)p3); - vgeomID [i] = geomID_; - vprimID [i] = primID; - v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; - v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; - v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; - v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z; - } - Quad4v::store_nt(accel,Quad4v(v0,v1,v2,v3,vgeomID,vprimID)); - BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper); -#if ROTATE_TREE - if (N == 4) - box_o.lower.a = current.size(); -#endif - return NodeRecord(ref,box_o); - } - private: - QuadMesh* mesh; - BVHBuilderMorton::BuildPrim* morton; - unsigned int geomID_ = std::numeric_limits<unsigned int>::max(); - }; - - template<int N> - struct CreateMortonLeaf<N,Object> - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::NodeRecord NodeRecord; - - __forceinline CreateMortonLeaf (UserGeometry* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton) - : mesh(mesh), morton(morton), geomID_(geomID) {} - - __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc) - { - vfloat4 lower(pos_inf); - vfloat4 upper(neg_inf); - size_t items = current.size(); - size_t start = current.begin(); - - /* allocate leaf node */ - Object* accel = (Object*) alloc.malloc1(items*sizeof(Object),BVH::byteAlignment); - NodeRef ref = BVH::encodeLeaf((char*)accel,items); - const UserGeometry* mesh = this->mesh; - - BBox3fa bounds = empty; - for (size_t i=0; i<items; i++) - { - const unsigned int index = morton[start+i].index; - const unsigned int primID = index; - bounds.extend(mesh->bounds(primID)); - new (&accel[i]) Object(geomID_,primID); - } - - BBox3fx box_o = (BBox3fx&)bounds; -#if ROTATE_TREE - if (N == 4) - box_o.lower.a = current.size(); -#endif - return NodeRecord(ref,box_o); - } - private: - UserGeometry* mesh; - BVHBuilderMorton::BuildPrim* morton; - unsigned int geomID_ = std::numeric_limits<unsigned int>::max(); - }; - - template<int N> - struct CreateMortonLeaf<N,InstancePrimitive> - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::NodeRecord NodeRecord; - - __forceinline CreateMortonLeaf (Instance* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton) - : mesh(mesh), morton(morton), geomID_(geomID) {} - - __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc) - { - vfloat4 lower(pos_inf); - vfloat4 upper(neg_inf); - size_t items = current.size(); - size_t start = current.begin(); - assert(items <= 1); - - /* allocate leaf node */ - InstancePrimitive* accel = (InstancePrimitive*) alloc.malloc1(items*sizeof(InstancePrimitive),BVH::byteAlignment); - NodeRef ref = BVH::encodeLeaf((char*)accel,items); - const Instance* instance = this->mesh; - - BBox3fa bounds = empty; - for (size_t i=0; i<items; i++) - { - const unsigned int primID = morton[start+i].index; - bounds.extend(instance->bounds(primID)); - new (&accel[i]) InstancePrimitive(instance, geomID_); - } - - BBox3fx box_o = (BBox3fx&)bounds; -#if ROTATE_TREE - if (N == 4) - box_o.lower.a = current.size(); -#endif - return NodeRecord(ref,box_o); - } - private: - Instance* mesh; - BVHBuilderMorton::BuildPrim* morton; - unsigned int geomID_ = std::numeric_limits<unsigned int>::max(); - }; - - template<typename Mesh> - struct CalculateMeshBounds - { - __forceinline CalculateMeshBounds (Mesh* mesh) - : mesh(mesh) {} - - __forceinline const BBox3fa operator() (const BVHBuilderMorton::BuildPrim& morton) { - return mesh->bounds(morton.index); - } - - private: - Mesh* mesh; - }; - - template<int N, typename Mesh, typename Primitive> - class BVHNMeshBuilderMorton : public Builder - { - typedef BVHN<N> BVH; - typedef typename BVH::AABBNode AABBNode; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::NodeRecord NodeRecord; - - public: - - BVHNMeshBuilderMorton (BVH* bvh, Mesh* mesh, unsigned int geomID, const size_t minLeafSize, const size_t maxLeafSize, const size_t singleThreadThreshold = DEFAULT_SINGLE_THREAD_THRESHOLD) - : bvh(bvh), mesh(mesh), morton(bvh->device,0), settings(N,BVH::maxBuildDepth,minLeafSize,min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks),singleThreadThreshold), geomID_(geomID) {} - - /* build function */ - void build() - { - /* we reset the allocator when the mesh size changed */ - if (mesh->numPrimitives != numPreviousPrimitives) { - bvh->alloc.clear(); - morton.clear(); - } - size_t numPrimitives = mesh->size(); - numPreviousPrimitives = numPrimitives; - - /* skip build for empty scene */ - if (numPrimitives == 0) { - bvh->set(BVH::emptyNode,empty,0); - return; - } - - /* preallocate arrays */ - morton.resize(numPrimitives); - size_t bytesEstimated = numPrimitives*sizeof(AABBNode)/(4*N) + size_t(1.2f*Primitive::blocks(numPrimitives)*sizeof(Primitive)); - size_t bytesMortonCodes = numPrimitives*sizeof(BVHBuilderMorton::BuildPrim); - bytesEstimated = max(bytesEstimated,bytesMortonCodes); // the first allocation block is reused to sort the morton codes - bvh->alloc.init(bytesMortonCodes,bytesMortonCodes,bytesEstimated); - - /* create morton code array */ - BVHBuilderMorton::BuildPrim* dest = (BVHBuilderMorton::BuildPrim*) bvh->alloc.specialAlloc(bytesMortonCodes); - size_t numPrimitivesGen = createMortonCodeArray<Mesh>(mesh,morton,bvh->scene->progressInterface); - - /* create BVH */ - SetBVHNBounds<N> setBounds(bvh); - CreateMortonLeaf<N,Primitive> createLeaf(mesh,geomID_,morton.data()); - CalculateMeshBounds<Mesh> calculateBounds(mesh); - auto root = BVHBuilderMorton::build<NodeRecord>( - typename BVH::CreateAlloc(bvh), - typename BVH::AABBNode::Create(), - setBounds,createLeaf,calculateBounds,bvh->scene->progressInterface, - morton.data(),dest,numPrimitivesGen,settings); - - bvh->set(root.ref,LBBox3fa(root.bounds),numPrimitives); - -#if ROTATE_TREE - if (N == 4) - { - for (int i=0; i<ROTATE_TREE; i++) - BVHNRotate<N>::rotate(bvh->root); - bvh->clearBarrier(bvh->root); - } -#endif - - /* clear temporary data for static geometry */ - if (bvh->scene->isStaticAccel()) { - morton.clear(); - } - bvh->cleanup(); - } - - void clear() { - morton.clear(); - } - - private: - BVH* bvh; - Mesh* mesh; - mvector<BVHBuilderMorton::BuildPrim> morton; - BVHBuilderMorton::Settings settings; - unsigned int geomID_ = std::numeric_limits<unsigned int>::max(); - unsigned int numPreviousPrimitives = 0; - }; - -#if defined(EMBREE_GEOMETRY_TRIANGLE) - Builder* BVH4Triangle4MeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4> ((BVH4*)bvh,mesh,geomID,4,4); } - Builder* BVH4Triangle4vMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4v>((BVH4*)bvh,mesh,geomID,4,4); } - Builder* BVH4Triangle4iMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4i>((BVH4*)bvh,mesh,geomID,4,4); } -#if defined(__AVX__) - Builder* BVH8Triangle4MeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4> ((BVH8*)bvh,mesh,geomID,4,4); } - Builder* BVH8Triangle4vMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4v>((BVH8*)bvh,mesh,geomID,4,4); } - Builder* BVH8Triangle4iMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4i>((BVH8*)bvh,mesh,geomID,4,4); } -#endif -#endif - -#if defined(EMBREE_GEOMETRY_QUAD) - Builder* BVH4Quad4vMeshBuilderMortonGeneral (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,QuadMesh,Quad4v>((BVH4*)bvh,mesh,geomID,4,4); } -#if defined(__AVX__) - Builder* BVH8Quad4vMeshBuilderMortonGeneral (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,QuadMesh,Quad4v>((BVH8*)bvh,mesh,geomID,4,4); } -#endif -#endif - -#if defined(EMBREE_GEOMETRY_USER) - Builder* BVH4VirtualMeshBuilderMortonGeneral (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,UserGeometry,Object>((BVH4*)bvh,mesh,geomID,1,BVH4::maxLeafBlocks); } -#if defined(__AVX__) - Builder* BVH8VirtualMeshBuilderMortonGeneral (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,UserGeometry,Object>((BVH8*)bvh,mesh,geomID,1,BVH4::maxLeafBlocks); } -#endif -#endif - -#if defined(EMBREE_GEOMETRY_INSTANCE) - Builder* BVH4InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,Instance,InstancePrimitive>((BVH4*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); } -#if defined(__AVX__) - Builder* BVH8InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,Instance,InstancePrimitive>((BVH8*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); } -#endif -#endif - - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah.cpp deleted file mode 100644 index cf5b2eb47f..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah.cpp +++ /dev/null @@ -1,640 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "bvh.h" -#include "bvh_builder.h" -#include "../builders/primrefgen.h" -#include "../builders/splitter.h" - -#include "../geometry/linei.h" -#include "../geometry/triangle.h" -#include "../geometry/trianglev.h" -#include "../geometry/trianglev_mb.h" -#include "../geometry/trianglei.h" -#include "../geometry/quadv.h" -#include "../geometry/quadi.h" -#include "../geometry/object.h" -#include "../geometry/instance.h" -#include "../geometry/subgrid.h" - -#include "../common/state.h" -#include "../../common/algorithms/parallel_for_for.h" -#include "../../common/algorithms/parallel_for_for_prefix_sum.h" - -#define PROFILE 0 -#define PROFILE_RUNS 20 - -namespace embree -{ - namespace isa - { - template<int N, typename Primitive> - struct CreateLeaf - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - - __forceinline CreateLeaf (BVH* bvh) : bvh(bvh) {} - - __forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const - { - size_t n = set.size(); - size_t items = Primitive::blocks(n); - size_t start = set.begin(); - Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment); - typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items); - for (size_t i=0; i<items; i++) { - accel[i].fill(prims,start,set.end(),bvh->scene); - } - return node; - } - - BVH* bvh; - }; - - - template<int N, typename Primitive> - struct CreateLeafQuantized - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - - __forceinline CreateLeafQuantized (BVH* bvh) : bvh(bvh) {} - - __forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const - { - size_t n = set.size(); - size_t items = Primitive::blocks(n); - size_t start = set.begin(); - Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment); - typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items); - for (size_t i=0; i<items; i++) { - accel[i].fill(prims,start,set.end(),bvh->scene); - } - return node; - } - - BVH* bvh; - }; - - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - - template<int N, typename Primitive> - struct BVHNBuilderSAH : public Builder - { - typedef BVHN<N> BVH; - typedef typename BVHN<N>::NodeRef NodeRef; - - BVH* bvh; - Scene* scene; - Geometry* mesh; - mvector<PrimRef> prims; - GeneralBVHBuilder::Settings settings; - Geometry::GTypeMask gtype_; - unsigned int geomID_ = std::numeric_limits<unsigned int>::max (); - bool primrefarrayalloc; - unsigned int numPreviousPrimitives = 0; - - BVHNBuilderSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, - const Geometry::GTypeMask gtype, bool primrefarrayalloc = false) - : bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0), - settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), primrefarrayalloc(primrefarrayalloc) {} - - BVHNBuilderSAH (BVH* bvh, Geometry* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype) - : bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), geomID_(geomID), primrefarrayalloc(false) {} - - // FIXME: shrink bvh->alloc in destructor here and in other builders too - - void build() - { - /* we reset the allocator when the mesh size changed */ - if (mesh && mesh->numPrimitives != numPreviousPrimitives) { - bvh->alloc.clear(); - } - - /* if we use the primrefarray for allocations we have to take it back from the BVH */ - if (settings.primrefarrayalloc != size_t(inf)) - bvh->alloc.unshare(prims); - - /* skip build for empty scene */ - const size_t numPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(gtype_,false); - numPreviousPrimitives = numPrimitives; - if (numPrimitives == 0) { - bvh->clear(); - prims.clear(); - return; - } - - double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + "BuilderSAH"); - -#if PROFILE - profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) { -#endif - - /* create primref array */ - if (primrefarrayalloc) { - settings.primrefarrayalloc = numPrimitives/1000; - if (settings.primrefarrayalloc < 1000) - settings.primrefarrayalloc = inf; - } - - /* enable os_malloc for two level build */ - if (mesh) - bvh->alloc.setOSallocation(true); - - /* initialize allocator */ - const size_t node_bytes = numPrimitives*sizeof(typename BVH::AABBNodeMB)/(4*N); - const size_t leaf_bytes = size_t(1.2*Primitive::blocks(numPrimitives)*sizeof(Primitive)); - bvh->alloc.init_estimate(node_bytes+leaf_bytes); - settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes); - prims.resize(numPrimitives); - - PrimInfo pinfo = mesh ? - createPrimRefArray(mesh,geomID_,prims,bvh->scene->progressInterface) : - createPrimRefArray(scene,gtype_,false,prims,bvh->scene->progressInterface); - - /* pinfo might has zero size due to invalid geometry */ - if (unlikely(pinfo.size() == 0)) - { - bvh->clear(); - prims.clear(); - return; - } - - /* call BVH builder */ - NodeRef root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeaf<N,Primitive>(bvh),bvh->scene->progressInterface,prims.data(),pinfo,settings); - bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size()); - bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f)); - -#if PROFILE - }); -#endif - - /* if we allocated using the primrefarray we have to keep it alive */ - if (settings.primrefarrayalloc != size_t(inf)) - bvh->alloc.share(prims); - - /* for static geometries we can do some cleanups */ - else if (scene && scene->isStaticAccel()) { - prims.clear(); - } - bvh->cleanup(); - bvh->postBuild(t0); - } - - void clear() { - prims.clear(); - } - }; - - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - - template<int N, typename Primitive> - struct BVHNBuilderSAHQuantized : public Builder - { - typedef BVHN<N> BVH; - typedef typename BVHN<N>::NodeRef NodeRef; - - BVH* bvh; - Scene* scene; - Geometry* mesh; - mvector<PrimRef> prims; - GeneralBVHBuilder::Settings settings; - Geometry::GTypeMask gtype_; - unsigned int geomID_ = std::numeric_limits<unsigned int>::max(); - unsigned int numPreviousPrimitives = 0; - - BVHNBuilderSAHQuantized (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype) - : bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype) {} - - BVHNBuilderSAHQuantized (BVH* bvh, Geometry* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype) - : bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), geomID_(geomID) {} - - // FIXME: shrink bvh->alloc in destructor here and in other builders too - - void build() - { - /* we reset the allocator when the mesh size changed */ - if (mesh && mesh->numPrimitives != numPreviousPrimitives) { - bvh->alloc.clear(); - } - - /* skip build for empty scene */ - const size_t numPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(gtype_,false); - numPreviousPrimitives = numPrimitives; - if (numPrimitives == 0) { - prims.clear(); - bvh->clear(); - return; - } - - double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::QBVH" + toString(N) + "BuilderSAH"); - -#if PROFILE - profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) { -#endif - /* create primref array */ - prims.resize(numPrimitives); - PrimInfo pinfo = mesh ? - createPrimRefArray(mesh,geomID_,prims,bvh->scene->progressInterface) : - createPrimRefArray(scene,gtype_,false,prims,bvh->scene->progressInterface); - - /* enable os_malloc for two level build */ - if (mesh) - bvh->alloc.setOSallocation(true); - - /* call BVH builder */ - const size_t node_bytes = numPrimitives*sizeof(typename BVH::QuantizedNode)/(4*N); - const size_t leaf_bytes = size_t(1.2*Primitive::blocks(numPrimitives)*sizeof(Primitive)); - bvh->alloc.init_estimate(node_bytes+leaf_bytes); - settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes); - NodeRef root = BVHNBuilderQuantizedVirtual<N>::build(&bvh->alloc,CreateLeafQuantized<N,Primitive>(bvh),bvh->scene->progressInterface,prims.data(),pinfo,settings); - bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size()); - //bvh->layoutLargeNodes(pinfo.size()*0.005f); // FIXME: COPY LAYOUT FOR LARGE NODES !!! -#if PROFILE - }); -#endif - - /* clear temporary data for static geometry */ - if (scene && scene->isStaticAccel()) { - prims.clear(); - } - bvh->cleanup(); - bvh->postBuild(t0); - } - - void clear() { - prims.clear(); - } - }; - - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - - - template<int N, typename Primitive> - struct CreateLeafGrid - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - - __forceinline CreateLeafGrid (BVH* bvh, const SubGridBuildData * const sgrids) : bvh(bvh),sgrids(sgrids) {} - - __forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const - { - const size_t items = set.size(); //Primitive::blocks(n); - const size_t start = set.begin(); - - /* collect all subsets with unique geomIDs */ - assert(items <= N); - unsigned int geomIDs[N]; - unsigned int num_geomIDs = 1; - geomIDs[0] = prims[start].geomID(); - - for (size_t i=1;i<items;i++) - { - bool found = false; - const unsigned int new_geomID = prims[start+i].geomID(); - for (size_t j=0;j<num_geomIDs;j++) - if (new_geomID == geomIDs[j]) - { found = true; break; } - if (!found) - geomIDs[num_geomIDs++] = new_geomID; - } - - /* allocate all leaf memory in one single block */ - SubGridQBVHN<N>* accel = (SubGridQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridQBVHN<N>),BVH::byteAlignment); - typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,num_geomIDs); - - for (size_t g=0;g<num_geomIDs;g++) - { - unsigned int x[N]; - unsigned int y[N]; - unsigned int primID[N]; - BBox3fa bounds[N]; - unsigned int pos = 0; - for (size_t i=0;i<items;i++) - { - if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue; - - const SubGridBuildData& sgrid_bd = sgrids[prims[start+i].primID()]; - x[pos] = sgrid_bd.sx; - y[pos] = sgrid_bd.sy; - primID[pos] = sgrid_bd.primID; - bounds[pos] = prims[start+i].bounds(); - pos++; - } - assert(pos <= N); - new (&accel[g]) SubGridQBVHN<N>(x,y,primID,bounds,geomIDs[g],pos); - } - - return node; - } - - BVH* bvh; - const SubGridBuildData * const sgrids; - }; - - - template<int N> - struct BVHNBuilderSAHGrid : public Builder - { - typedef BVHN<N> BVH; - typedef typename BVHN<N>::NodeRef NodeRef; - - BVH* bvh; - Scene* scene; - GridMesh* mesh; - mvector<PrimRef> prims; - mvector<SubGridBuildData> sgrids; - GeneralBVHBuilder::Settings settings; - unsigned int geomID_ = std::numeric_limits<unsigned int>::max(); - unsigned int numPreviousPrimitives = 0; - - BVHNBuilderSAHGrid (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode) - : bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0), sgrids(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD) {} - - BVHNBuilderSAHGrid (BVH* bvh, GridMesh* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode) - : bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), sgrids(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), geomID_(geomID) {} - - void build() - { - /* we reset the allocator when the mesh size changed */ - if (mesh && mesh->numPrimitives != numPreviousPrimitives) { - bvh->alloc.clear(); - } - - /* if we use the primrefarray for allocations we have to take it back from the BVH */ - if (settings.primrefarrayalloc != size_t(inf)) - bvh->alloc.unshare(prims); - - const size_t numGridPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(GridMesh::geom_type,false); - numPreviousPrimitives = numGridPrimitives; - - PrimInfo pinfo(empty); - size_t numPrimitives = 0; - - if (!mesh) - { - /* first run to get #primitives */ - - ParallelForForPrefixSumState<PrimInfo> pstate; - Scene::Iterator<GridMesh,false> iter(scene); - - pstate.init(iter,size_t(1024)); - - /* iterate over all meshes in the scene */ - pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo { - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - if (!mesh->valid(j)) continue; - BBox3fa bounds = empty; - const PrimRef prim(bounds,(unsigned)geomID,(unsigned)j); - if (!mesh->valid(j)) continue; - pinfo.add_center2(prim,mesh->getNumSubGrids(j)); - } - return pinfo; - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - numPrimitives = pinfo.size(); - - /* resize arrays */ - sgrids.resize(numPrimitives); - prims.resize(numPrimitives); - - /* second run to fill primrefs and SubGridBuildData arrays */ - pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo { - k = base.size(); - size_t p_index = k; - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - if (!mesh->valid(j)) continue; - const GridMesh::Grid &g = mesh->grid(j); - for (unsigned int y=0; y<g.resY-1u; y+=2) - for (unsigned int x=0; x<g.resX-1u; x+=2) - { - BBox3fa bounds = empty; - if (!mesh->buildBounds(g,x,y,bounds)) continue; // get bounds of subgrid - const PrimRef prim(bounds,(unsigned)geomID,(unsigned)p_index); - pinfo.add_center2(prim); - sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j)); - prims[p_index++] = prim; - } - } - return pinfo; - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - assert(pinfo.size() == numPrimitives); - } - else - { - ParallelPrefixSumState<PrimInfo> pstate; - /* iterate over all grids in a single mesh */ - pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo - { - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - if (!mesh->valid(j)) continue; - BBox3fa bounds = empty; - const PrimRef prim(bounds,geomID_,unsigned(j)); - pinfo.add_center2(prim,mesh->getNumSubGrids(j)); - } - return pinfo; - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - numPrimitives = pinfo.size(); - /* resize arrays */ - sgrids.resize(numPrimitives); - prims.resize(numPrimitives); - - /* second run to fill primrefs and SubGridBuildData arrays */ - pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo - { - - size_t p_index = base.size(); - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - if (!mesh->valid(j)) continue; - const GridMesh::Grid &g = mesh->grid(j); - for (unsigned int y=0; y<g.resY-1u; y+=2) - for (unsigned int x=0; x<g.resX-1u; x+=2) - { - BBox3fa bounds = empty; - if (!mesh->buildBounds(g,x,y,bounds)) continue; // get bounds of subgrid - const PrimRef prim(bounds,geomID_,unsigned(p_index)); - pinfo.add_center2(prim); - sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j)); - prims[p_index++] = prim; - } - } - return pinfo; - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - - } - - /* no primitives */ - if (numPrimitives == 0) { - bvh->clear(); - prims.clear(); - sgrids.clear(); - return; - } - - double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + "BuilderSAH"); - - /* create primref array */ - settings.primrefarrayalloc = numPrimitives/1000; - if (settings.primrefarrayalloc < 1000) - settings.primrefarrayalloc = inf; - - /* enable os_malloc for two level build */ - if (mesh) - bvh->alloc.setOSallocation(true); - - /* initialize allocator */ - const size_t node_bytes = numPrimitives*sizeof(typename BVH::AABBNodeMB)/(4*N); - const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>)); - - bvh->alloc.init_estimate(node_bytes+leaf_bytes); - settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes); - - /* pinfo might has zero size due to invalid geometry */ - if (unlikely(pinfo.size() == 0)) - { - bvh->clear(); - sgrids.clear(); - prims.clear(); - return; - } - - /* call BVH builder */ - NodeRef root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeafGrid<N,SubGridQBVHN<N>>(bvh,sgrids.data()),bvh->scene->progressInterface,prims.data(),pinfo,settings); - bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size()); - bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f)); - - /* clear temporary array */ - sgrids.clear(); - - /* if we allocated using the primrefarray we have to keep it alive */ - if (settings.primrefarrayalloc != size_t(inf)) - bvh->alloc.share(prims); - - /* for static geometries we can do some cleanups */ - else if (scene && scene->isStaticAccel()) { - prims.clear(); - } - bvh->cleanup(); - bvh->postBuild(t0); - } - - void clear() { - prims.clear(); - } - }; - - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - -#if defined(EMBREE_GEOMETRY_TRIANGLE) - Builder* BVH4Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); } - Builder* BVH4Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4v>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); } - Builder* BVH4Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4i>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); } - - Builder* BVH4Triangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); } - Builder* BVH4Triangle4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4v>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); } - Builder* BVH4Triangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type,true); } - - - Builder* BVH4QuantizedTriangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); } -#if defined(__AVX__) - Builder* BVH8Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); } - Builder* BVH8Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4v>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); } - Builder* BVH8Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4i>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); } - - Builder* BVH8Triangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); } - Builder* BVH8Triangle4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4v>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); } - Builder* BVH8Triangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type,true); } - Builder* BVH8QuantizedTriangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); } - Builder* BVH8QuantizedTriangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Triangle4>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); } - -#endif -#endif - -#if defined(EMBREE_GEOMETRY_QUAD) - Builder* BVH4Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Quad4v>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); } - Builder* BVH4Quad4iMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Quad4i>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); } - Builder* BVH4Quad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Quad4v>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); } - Builder* BVH4Quad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type,true); } - Builder* BVH4QuantizedQuad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Quad4v>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); } - Builder* BVH4QuantizedQuad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); } - -#if defined(__AVX__) - Builder* BVH8Quad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Quad4v>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); } - Builder* BVH8Quad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type,true); } - Builder* BVH8QuantizedQuad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Quad4v>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); } - Builder* BVH8QuantizedQuad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); } - Builder* BVH8Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Quad4v>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); } - -#endif -#endif - -#if defined(EMBREE_GEOMETRY_USER) - - Builder* BVH4VirtualSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { - int minLeafSize = scene->device->object_accel_min_leaf_size; - int maxLeafSize = scene->device->object_accel_max_leaf_size; - return new BVHNBuilderSAH<4,Object>((BVH4*)bvh,scene,4,1.0f,minLeafSize,maxLeafSize,UserGeometry::geom_type); - } - - Builder* BVH4VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { - return new BVHNBuilderSAH<4,Object>((BVH4*)bvh,mesh,geomID,4,1.0f,1,inf,UserGeometry::geom_type); - } -#if defined(__AVX__) - - Builder* BVH8VirtualSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { - int minLeafSize = scene->device->object_accel_min_leaf_size; - int maxLeafSize = scene->device->object_accel_max_leaf_size; - return new BVHNBuilderSAH<8,Object>((BVH8*)bvh,scene,8,1.0f,minLeafSize,maxLeafSize,UserGeometry::geom_type); - } - - Builder* BVH8VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { - return new BVHNBuilderSAH<8,Object>((BVH8*)bvh,mesh,geomID,8,1.0f,1,inf,UserGeometry::geom_type); - } -#endif -#endif - -#if defined(EMBREE_GEOMETRY_INSTANCE) - Builder* BVH4InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype); } - Builder* BVH4InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { - return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,mesh,geomID,4,1.0f,1,inf,gtype); - } -#if defined(__AVX__) - Builder* BVH8InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype); } - Builder* BVH8InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { - return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,mesh,geomID,8,1.0f,1,inf,gtype); - } -#endif -#endif - -#if defined(EMBREE_GEOMETRY_GRID) - Builder* BVH4GridMeshBuilderSAH (void* bvh, GridMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAHGrid<4>((BVH4*)bvh,mesh,geomID,4,1.0f,4,4,mode); } - Builder* BVH4GridSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHGrid<4>((BVH4*)bvh,scene,4,1.0f,4,4,mode); } // FIXME: check whether cost factors are correct - -#if defined(__AVX__) - Builder* BVH8GridMeshBuilderSAH (void* bvh, GridMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAHGrid<8>((BVH8*)bvh,mesh,geomID,8,1.0f,8,8,mode); } - Builder* BVH8GridSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHGrid<8>((BVH8*)bvh,scene,8,1.0f,8,8,mode); } // FIXME: check whether cost factors are correct -#endif -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_mb.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_mb.cpp deleted file mode 100644 index 9c01553ec6..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_mb.cpp +++ /dev/null @@ -1,705 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "bvh.h" -#include "bvh_builder.h" -#include "../builders/bvh_builder_msmblur.h" - -#include "../builders/primrefgen.h" -#include "../builders/splitter.h" - -#include "../geometry/linei.h" -#include "../geometry/triangle.h" -#include "../geometry/trianglev.h" -#include "../geometry/trianglev_mb.h" -#include "../geometry/trianglei.h" -#include "../geometry/quadv.h" -#include "../geometry/quadi.h" -#include "../geometry/object.h" -#include "../geometry/instance.h" -#include "../geometry/subgrid.h" - -#include "../common/state.h" - -// FIXME: remove after removing BVHNBuilderMBlurRootTimeSplitsSAH -#include "../../common/algorithms/parallel_for_for.h" -#include "../../common/algorithms/parallel_for_for_prefix_sum.h" - - -namespace embree -{ - namespace isa - { - -#if 0 - template<int N, typename Primitive> - struct CreateMBlurLeaf - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::NodeRecordMB NodeRecordMB; - - __forceinline CreateMBlurLeaf (BVH* bvh, PrimRef* prims, size_t time) : bvh(bvh), prims(prims), time(time) {} - - __forceinline NodeRecordMB operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const - { - size_t items = Primitive::blocks(set.size()); - size_t start = set.begin(); - for (size_t i=start; i<end; i++) assert((*current.prims.prims)[start].geomID() == (*current.prims.prims)[i].geomID()); // assert that all geomIDs are identical - Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment); - NodeRef node = bvh->encodeLeaf((char*)accel,items); - - LBBox3fa allBounds = empty; - for (size_t i=0; i<items; i++) - allBounds.extend(accel[i].fillMB(prims, start, set.end(), bvh->scene, time)); - - return NodeRecordMB(node,allBounds); - } - - BVH* bvh; - PrimRef* prims; - size_t time; - }; -#endif - - template<int N, typename Mesh, typename Primitive> - struct CreateMSMBlurLeaf - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::NodeRecordMB4D NodeRecordMB4D; - - __forceinline CreateMSMBlurLeaf (BVH* bvh) : bvh(bvh) {} - - __forceinline const NodeRecordMB4D operator() (const BVHBuilderMSMBlur::BuildRecord& current, const FastAllocator::CachedAllocator& alloc) const - { - size_t items = Primitive::blocks(current.prims.size()); - size_t start = current.prims.begin(); - size_t end = current.prims.end(); - for (size_t i=start; i<end; i++) assert((*current.prims.prims)[start].geomID() == (*current.prims.prims)[i].geomID()); // assert that all geomIDs are identical - Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteNodeAlignment); - NodeRef node = bvh->encodeLeaf((char*)accel,items); - LBBox3fa allBounds = empty; - for (size_t i=0; i<items; i++) - allBounds.extend(accel[i].fillMB(current.prims.prims->data(), start, current.prims.end(), bvh->scene, current.prims.time_range)); - return NodeRecordMB4D(node,allBounds,current.prims.time_range); - } - - BVH* bvh; - }; - - /* Motion blur BVH with 4D nodes and internal time splits */ - template<int N, typename Mesh, typename Primitive> - struct BVHNBuilderMBlurSAH : public Builder - { - typedef BVHN<N> BVH; - typedef typename BVHN<N>::NodeRef NodeRef; - typedef typename BVHN<N>::NodeRecordMB NodeRecordMB; - typedef typename BVHN<N>::AABBNodeMB AABBNodeMB; - - BVH* bvh; - Scene* scene; - const size_t sahBlockSize; - const float intCost; - const size_t minLeafSize; - const size_t maxLeafSize; - const Geometry::GTypeMask gtype_; - - BVHNBuilderMBlurSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype) - : bvh(bvh), scene(scene), sahBlockSize(sahBlockSize), intCost(intCost), minLeafSize(minLeafSize), maxLeafSize(min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks)), gtype_(gtype) {} - - void build() - { - /* skip build for empty scene */ - const size_t numPrimitives = scene->getNumPrimitives(gtype_,true); - if (numPrimitives == 0) { bvh->clear(); return; } - - double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderMBlurSAH"); - -#if PROFILE - profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) { -#endif - - //const size_t numTimeSteps = scene->getNumTimeSteps<typename Mesh::type_t,true>(); - //const size_t numTimeSegments = numTimeSteps-1; assert(numTimeSteps > 1); - - /*if (numTimeSegments == 1) - buildSingleSegment(numPrimitives); - else*/ - buildMultiSegment(numPrimitives); - -#if PROFILE - }); -#endif - - /* clear temporary data for static geometry */ - bvh->cleanup(); - bvh->postBuild(t0); - } - -#if 0 // No longer compatible when time_ranges are present for geometries. Would have to create temporal nodes sometimes, and put only a single geometry into leaf. - void buildSingleSegment(size_t numPrimitives) - { - /* create primref array */ - mvector<PrimRef> prims(scene->device,numPrimitives); - const PrimInfo pinfo = createPrimRefArrayMBlur(scene,gtype_,prims,bvh->scene->progressInterface,0); - /* early out if no valid primitives */ - if (pinfo.size() == 0) { bvh->clear(); return; } - /* estimate acceleration structure size */ - const size_t node_bytes = pinfo.size()*sizeof(AABBNodeMB)/(4*N); - const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive)); - bvh->alloc.init_estimate(node_bytes+leaf_bytes); - - /* settings for BVH build */ - GeneralBVHBuilder::Settings settings; - settings.branchingFactor = N; - settings.maxDepth = BVH::maxBuildDepthLeaf; - settings.logBlockSize = bsr(sahBlockSize); - settings.minLeafSize = min(minLeafSize,maxLeafSize); - settings.maxLeafSize = maxLeafSize; - settings.travCost = travCost; - settings.intCost = intCost; - settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes); - - /* build hierarchy */ - auto root = BVHBuilderBinnedSAH::build<NodeRecordMB> - (typename BVH::CreateAlloc(bvh),typename BVH::AABBNodeMB::Create(),typename BVH::AABBNodeMB::Set(), - CreateMBlurLeaf<N,Primitive>(bvh,prims.data(),0),bvh->scene->progressInterface, - prims.data(),pinfo,settings); - - bvh->set(root.ref,root.lbounds,pinfo.size()); - } -#endif - - void buildMultiSegment(size_t numPrimitives) - { - /* create primref array */ - mvector<PrimRefMB> prims(scene->device,numPrimitives); - PrimInfoMB pinfo = createPrimRefArrayMSMBlur(scene,gtype_,prims,bvh->scene->progressInterface); - - /* early out if no valid primitives */ - if (pinfo.size() == 0) { bvh->clear(); return; } - - /* estimate acceleration structure size */ - const size_t node_bytes = pinfo.num_time_segments*sizeof(AABBNodeMB)/(4*N); - const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.num_time_segments)*sizeof(Primitive)); - bvh->alloc.init_estimate(node_bytes+leaf_bytes); - - /* settings for BVH build */ - BVHBuilderMSMBlur::Settings settings; - settings.branchingFactor = N; - settings.maxDepth = BVH::maxDepth; - settings.logBlockSize = bsr(sahBlockSize); - settings.minLeafSize = min(minLeafSize,maxLeafSize); - settings.maxLeafSize = maxLeafSize; - settings.travCost = travCost; - settings.intCost = intCost; - settings.singleLeafTimeSegment = Primitive::singleTimeSegment; - settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes); - - /* build hierarchy */ - auto root = - BVHBuilderMSMBlur::build<NodeRef>(prims,pinfo,scene->device, - RecalculatePrimRef<Mesh>(scene), - typename BVH::CreateAlloc(bvh), - typename BVH::AABBNodeMB4D::Create(), - typename BVH::AABBNodeMB4D::Set(), - CreateMSMBlurLeaf<N,Mesh,Primitive>(bvh), - bvh->scene->progressInterface, - settings); - - bvh->set(root.ref,root.lbounds,pinfo.num_time_segments); - } - - void clear() { - } - }; - - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - - struct GridRecalculatePrimRef - { - Scene* scene; - const SubGridBuildData * const sgrids; - - __forceinline GridRecalculatePrimRef (Scene* scene, const SubGridBuildData * const sgrids) - : scene(scene), sgrids(sgrids) {} - - __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const - { - const unsigned int geomID = prim.geomID(); - const GridMesh* mesh = scene->get<GridMesh>(geomID); - const unsigned int buildID = prim.primID(); - const SubGridBuildData &subgrid = sgrids[buildID]; - const unsigned int primID = subgrid.primID; - const size_t x = subgrid.x(); - const size_t y = subgrid.y(); - const LBBox3fa lbounds = mesh->linearBounds(mesh->grid(primID),x,y,time_range); - const unsigned num_time_segments = mesh->numTimeSegments(); - const range<int> tbounds = mesh->timeSegmentRange(time_range); - return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, num_time_segments, geomID, buildID); - } - - __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const { - const unsigned int geomID = prim.geomID(); - const GridMesh* mesh = scene->get<GridMesh>(geomID); - const unsigned int buildID = prim.primID(); - const SubGridBuildData &subgrid = sgrids[buildID]; - const unsigned int primID = subgrid.primID; - const size_t x = subgrid.x(); - const size_t y = subgrid.y(); - return mesh->linearBounds(mesh->grid(primID),x,y,time_range); - } - - }; - - template<int N> - struct CreateMSMBlurLeafGrid - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::NodeRecordMB4D NodeRecordMB4D; - - __forceinline CreateMSMBlurLeafGrid (Scene* scene, BVH* bvh, const SubGridBuildData * const sgrids) : scene(scene), bvh(bvh), sgrids(sgrids) {} - - __forceinline const NodeRecordMB4D operator() (const BVHBuilderMSMBlur::BuildRecord& current, const FastAllocator::CachedAllocator& alloc) const - { - const size_t items = current.prims.size(); - const size_t start = current.prims.begin(); - - const PrimRefMB* prims = current.prims.prims->data(); - /* collect all subsets with unique geomIDs */ - assert(items <= N); - unsigned int geomIDs[N]; - unsigned int num_geomIDs = 1; - geomIDs[0] = prims[start].geomID(); - - for (size_t i=1;i<items;i++) - { - bool found = false; - const unsigned int new_geomID = prims[start+i].geomID(); - for (size_t j=0;j<num_geomIDs;j++) - if (new_geomID == geomIDs[j]) - { found = true; break; } - if (!found) - geomIDs[num_geomIDs++] = new_geomID; - } - - /* allocate all leaf memory in one single block */ - SubGridMBQBVHN<N>* accel = (SubGridMBQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridMBQBVHN<N>),BVH::byteAlignment); - typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,num_geomIDs); - - LBBox3fa allBounds = empty; - - for (size_t g=0;g<num_geomIDs;g++) - { - const GridMesh* __restrict__ const mesh = scene->get<GridMesh>(geomIDs[g]); - unsigned int x[N]; - unsigned int y[N]; - unsigned int primID[N]; - BBox3fa bounds0[N]; - BBox3fa bounds1[N]; - unsigned int pos = 0; - for (size_t i=0;i<items;i++) - { - if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue; - - const SubGridBuildData &sgrid_bd = sgrids[prims[start+i].primID()]; - x[pos] = sgrid_bd.sx; - y[pos] = sgrid_bd.sy; - primID[pos] = sgrid_bd.primID; - const size_t x = sgrid_bd.x(); - const size_t y = sgrid_bd.y(); - LBBox3fa newBounds = mesh->linearBounds(mesh->grid(sgrid_bd.primID),x,y,current.prims.time_range); - allBounds.extend(newBounds); - bounds0[pos] = newBounds.bounds0; - bounds1[pos] = newBounds.bounds1; - pos++; - } - assert(pos <= N); - new (&accel[g]) SubGridMBQBVHN<N>(x,y,primID,bounds0,bounds1,geomIDs[g],current.prims.time_range.lower,1.0f/current.prims.time_range.size(),pos); - } - return NodeRecordMB4D(node,allBounds,current.prims.time_range); - } - - Scene *scene; - BVH* bvh; - const SubGridBuildData * const sgrids; - }; - -#if 0 - template<int N> - struct CreateLeafGridMB - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::NodeRecordMB NodeRecordMB; - - __forceinline CreateLeafGridMB (Scene* scene, BVH* bvh, const SubGridBuildData * const sgrids) - : scene(scene), bvh(bvh), sgrids(sgrids) {} - - __forceinline NodeRecordMB operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const - { - const size_t items = set.size(); - const size_t start = set.begin(); - - /* collect all subsets with unique geomIDs */ - assert(items <= N); - unsigned int geomIDs[N]; - unsigned int num_geomIDs = 1; - geomIDs[0] = prims[start].geomID(); - - for (size_t i=1;i<items;i++) - { - bool found = false; - const unsigned int new_geomID = prims[start+i].geomID(); - for (size_t j=0;j<num_geomIDs;j++) - if (new_geomID == geomIDs[j]) - { found = true; break; } - if (!found) - geomIDs[num_geomIDs++] = new_geomID; - } - - /* allocate all leaf memory in one single block */ - SubGridMBQBVHN<N>* accel = (SubGridMBQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridMBQBVHN<N>),BVH::byteAlignment); - typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,num_geomIDs); - - LBBox3fa allBounds = empty; - - for (size_t g=0;g<num_geomIDs;g++) - { - const GridMesh* __restrict__ const mesh = scene->get<GridMesh>(geomIDs[g]); - - unsigned int x[N]; - unsigned int y[N]; - unsigned int primID[N]; - BBox3fa bounds0[N]; - BBox3fa bounds1[N]; - unsigned int pos = 0; - for (size_t i=0;i<items;i++) - { - if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue; - - const SubGridBuildData &sgrid_bd = sgrids[prims[start+i].primID()]; - x[pos] = sgrid_bd.sx; - y[pos] = sgrid_bd.sy; - primID[pos] = sgrid_bd.primID; - const size_t x = sgrid_bd.x(); - const size_t y = sgrid_bd.y(); - bool MAYBE_UNUSED valid0 = mesh->buildBounds(mesh->grid(sgrid_bd.primID),x,y,0,bounds0[pos]); - bool MAYBE_UNUSED valid1 = mesh->buildBounds(mesh->grid(sgrid_bd.primID),x,y,1,bounds1[pos]); - assert(valid0); - assert(valid1); - allBounds.extend(LBBox3fa(bounds0[pos],bounds1[pos])); - pos++; - } - new (&accel[g]) SubGridMBQBVHN<N>(x,y,primID,bounds0,bounds1,geomIDs[g],0.0f,1.0f,pos); - } - return NodeRecordMB(node,allBounds); - } - - Scene *scene; - BVH* bvh; - const SubGridBuildData * const sgrids; - }; -#endif - - - /* Motion blur BVH with 4D nodes and internal time splits */ - template<int N> - struct BVHNBuilderMBlurSAHGrid : public Builder - { - typedef BVHN<N> BVH; - typedef typename BVHN<N>::NodeRef NodeRef; - typedef typename BVHN<N>::NodeRecordMB NodeRecordMB; - typedef typename BVHN<N>::AABBNodeMB AABBNodeMB; - - BVH* bvh; - Scene* scene; - const size_t sahBlockSize; - const float intCost; - const size_t minLeafSize; - const size_t maxLeafSize; - mvector<SubGridBuildData> sgrids; - - - BVHNBuilderMBlurSAHGrid (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize) - : bvh(bvh), scene(scene), sahBlockSize(sahBlockSize), intCost(intCost), minLeafSize(minLeafSize), maxLeafSize(min(maxLeafSize,BVH::maxLeafBlocks)), sgrids(scene->device,0) {} - - - PrimInfo createPrimRefArrayMBlurGrid(Scene* scene, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime) - { - /* first run to get #primitives */ - ParallelForForPrefixSumState<PrimInfo> pstate; - Scene::Iterator<GridMesh,true> iter(scene); - - pstate.init(iter,size_t(1024)); - - /* iterate over all meshes in the scene */ - PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo { - - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - if (!mesh->valid(j,range<size_t>(0,1))) continue; - BBox3fa bounds = empty; - const PrimRef prim(bounds,unsigned(geomID),unsigned(j)); - pinfo.add_center2(prim,mesh->getNumSubGrids(j)); - } - return pinfo; - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - - size_t numPrimitives = pinfo.size(); - if (numPrimitives == 0) return pinfo; - - /* resize arrays */ - sgrids.resize(numPrimitives); - prims.resize(numPrimitives); - - /* second run to fill primrefs and SubGridBuildData arrays */ - pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo { - - k = base.size(); - size_t p_index = k; - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - const GridMesh::Grid &g = mesh->grid(j); - if (!mesh->valid(j,range<size_t>(0,1))) continue; - - for (unsigned int y=0; y<g.resY-1u; y+=2) - for (unsigned int x=0; x<g.resX-1u; x+=2) - { - BBox3fa bounds = empty; - if (!mesh->buildBounds(g,x,y,itime,bounds)) continue; // get bounds of subgrid - const PrimRef prim(bounds,unsigned(geomID),unsigned(p_index)); - pinfo.add_center2(prim); - sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j)); - prims[p_index++] = prim; - } - } - return pinfo; - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - - assert(pinfo.size() == numPrimitives); - return pinfo; - } - - PrimInfoMB createPrimRefArrayMSMBlurGrid(Scene* scene, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f)) - { - /* first run to get #primitives */ - ParallelForForPrefixSumState<PrimInfoMB> pstate; - Scene::Iterator<GridMesh,true> iter(scene); - - pstate.init(iter,size_t(1024)); - /* iterate over all meshes in the scene */ - PrimInfoMB pinfoMB = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t /*geomID*/) -> PrimInfoMB { - - PrimInfoMB pinfoMB(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue; - LBBox3fa bounds(empty); - PrimInfoMB gridMB(0,mesh->getNumSubGrids(j)); - pinfoMB.merge(gridMB); - } - return pinfoMB; - }, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); }); - - size_t numPrimitives = pinfoMB.size(); - if (numPrimitives == 0) return pinfoMB; - - /* resize arrays */ - sgrids.resize(numPrimitives); - prims.resize(numPrimitives); - /* second run to fill primrefs and SubGridBuildData arrays */ - pinfoMB = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB { - - k = base.size(); - size_t p_index = k; - PrimInfoMB pinfoMB(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue; - const GridMesh::Grid &g = mesh->grid(j); - - for (unsigned int y=0; y<g.resY-1u; y+=2) - for (unsigned int x=0; x<g.resX-1u; x+=2) - { - const PrimRefMB prim(mesh->linearBounds(g,x,y,t0t1),mesh->numTimeSegments(),mesh->time_range,mesh->numTimeSegments(),unsigned(geomID),unsigned(p_index)); - pinfoMB.add_primref(prim); - sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j)); - prims[p_index++] = prim; - } - } - return pinfoMB; - }, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); }); - - assert(pinfoMB.size() == numPrimitives); - pinfoMB.time_range = t0t1; - return pinfoMB; - } - - void build() - { - /* skip build for empty scene */ - const size_t numPrimitives = scene->getNumPrimitives(GridMesh::geom_type,true); - if (numPrimitives == 0) { bvh->clear(); return; } - - double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderMBlurSAHGrid"); - - //const size_t numTimeSteps = scene->getNumTimeSteps<GridMesh,true>(); - //const size_t numTimeSegments = numTimeSteps-1; assert(numTimeSteps > 1); - //if (numTimeSegments == 1) - // buildSingleSegment(numPrimitives); - //else - buildMultiSegment(numPrimitives); - - /* clear temporary data for static geometry */ - bvh->cleanup(); - bvh->postBuild(t0); - } - -#if 0 - void buildSingleSegment(size_t numPrimitives) - { - /* create primref array */ - mvector<PrimRef> prims(scene->device,numPrimitives); - const PrimInfo pinfo = createPrimRefArrayMBlurGrid(scene,prims,bvh->scene->progressInterface,0); - /* early out if no valid primitives */ - if (pinfo.size() == 0) { bvh->clear(); return; } - - /* estimate acceleration structure size */ - const size_t node_bytes = pinfo.size()*sizeof(AABBNodeMB)/(4*N); - //TODO: check leaf_bytes - const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>)); - bvh->alloc.init_estimate(node_bytes+leaf_bytes); - - /* settings for BVH build */ - GeneralBVHBuilder::Settings settings; - settings.branchingFactor = N; - settings.maxDepth = BVH::maxBuildDepthLeaf; - settings.logBlockSize = bsr(sahBlockSize); - settings.minLeafSize = min(minLeafSize,maxLeafSize); - settings.maxLeafSize = maxLeafSize; - settings.travCost = travCost; - settings.intCost = intCost; - settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes); - - /* build hierarchy */ - auto root = BVHBuilderBinnedSAH::build<NodeRecordMB> - (typename BVH::CreateAlloc(bvh), - typename BVH::AABBNodeMB::Create(), - typename BVH::AABBNodeMB::Set(), - CreateLeafGridMB<N>(scene,bvh,sgrids.data()), - bvh->scene->progressInterface, - prims.data(),pinfo,settings); - - bvh->set(root.ref,root.lbounds,pinfo.size()); - } -#endif - - void buildMultiSegment(size_t numPrimitives) - { - /* create primref array */ - mvector<PrimRefMB> prims(scene->device,numPrimitives); - PrimInfoMB pinfo = createPrimRefArrayMSMBlurGrid(scene,prims,bvh->scene->progressInterface); - - /* early out if no valid primitives */ - if (pinfo.size() == 0) { bvh->clear(); return; } - - - - GridRecalculatePrimRef recalculatePrimRef(scene,sgrids.data()); - - /* estimate acceleration structure size */ - const size_t node_bytes = pinfo.num_time_segments*sizeof(AABBNodeMB)/(4*N); - //FIXME: check leaf_bytes - //const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.num_time_segments)*sizeof(SubGridQBVHN<N>)); - const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>)); - - bvh->alloc.init_estimate(node_bytes+leaf_bytes); - - /* settings for BVH build */ - BVHBuilderMSMBlur::Settings settings; - settings.branchingFactor = N; - settings.maxDepth = BVH::maxDepth; - settings.logBlockSize = bsr(sahBlockSize); - settings.minLeafSize = min(minLeafSize,maxLeafSize); - settings.maxLeafSize = maxLeafSize; - settings.travCost = travCost; - settings.intCost = intCost; - settings.singleLeafTimeSegment = false; - settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes); - - /* build hierarchy */ - auto root = - BVHBuilderMSMBlur::build<NodeRef>(prims,pinfo,scene->device, - recalculatePrimRef, - typename BVH::CreateAlloc(bvh), - typename BVH::AABBNodeMB4D::Create(), - typename BVH::AABBNodeMB4D::Set(), - CreateMSMBlurLeafGrid<N>(scene,bvh,sgrids.data()), - bvh->scene->progressInterface, - settings); - bvh->set(root.ref,root.lbounds,pinfo.num_time_segments); - } - - void clear() { - } - }; - - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - -#if defined(EMBREE_GEOMETRY_TRIANGLE) - Builder* BVH4Triangle4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,TriangleMesh,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); } - Builder* BVH4Triangle4vMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,TriangleMesh,Triangle4vMB>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); } -#if defined(__AVX__) - Builder* BVH8Triangle4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,TriangleMesh,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); } - Builder* BVH8Triangle4vMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,TriangleMesh,Triangle4vMB>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); } -#endif -#endif - -#if defined(EMBREE_GEOMETRY_QUAD) - Builder* BVH4Quad4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,QuadMesh,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_QUAD_MESH); } -#if defined(__AVX__) - Builder* BVH8Quad4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,QuadMesh,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_QUAD_MESH); } -#endif -#endif - -#if defined(EMBREE_GEOMETRY_USER) - Builder* BVH4VirtualMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { - int minLeafSize = scene->device->object_accel_mb_min_leaf_size; - int maxLeafSize = scene->device->object_accel_mb_max_leaf_size; - return new BVHNBuilderMBlurSAH<4,UserGeometry,Object>((BVH4*)bvh,scene,4,1.0f,minLeafSize,maxLeafSize,Geometry::MTY_USER_GEOMETRY); - } -#if defined(__AVX__) - Builder* BVH8VirtualMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { - int minLeafSize = scene->device->object_accel_mb_min_leaf_size; - int maxLeafSize = scene->device->object_accel_mb_max_leaf_size; - return new BVHNBuilderMBlurSAH<8,UserGeometry,Object>((BVH8*)bvh,scene,8,1.0f,minLeafSize,maxLeafSize,Geometry::MTY_USER_GEOMETRY); - } -#endif -#endif - -#if defined(EMBREE_GEOMETRY_INSTANCE) - Builder* BVH4InstanceMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<4,Instance,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype); } -#if defined(__AVX__) - Builder* BVH8InstanceMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<8,Instance,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype); } -#endif -#endif - -#if defined(EMBREE_GEOMETRY_GRID) - Builder* BVH4GridMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAHGrid<4>((BVH4*)bvh,scene,4,1.0f,4,4); } -#if defined(__AVX__) - Builder* BVH8GridMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAHGrid<8>((BVH8*)bvh,scene,8,1.0f,8,8); } -#endif -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_spatial.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_spatial.cpp deleted file mode 100644 index 285b38c39d..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_spatial.cpp +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "bvh.h" -#include "bvh_builder.h" - -#include "../builders/primrefgen.h" -#include "../builders/primrefgen_presplit.h" -#include "../builders/splitter.h" - -#include "../geometry/linei.h" -#include "../geometry/triangle.h" -#include "../geometry/trianglev.h" -#include "../geometry/trianglev_mb.h" -#include "../geometry/trianglei.h" -#include "../geometry/quadv.h" -#include "../geometry/quadi.h" -#include "../geometry/object.h" -#include "../geometry/instance.h" -#include "../geometry/subgrid.h" - -#include "../common/state.h" - -namespace embree -{ - namespace isa - { - template<int N, typename Primitive> - struct CreateLeafSpatial - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - - __forceinline CreateLeafSpatial (BVH* bvh) : bvh(bvh) {} - - __forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const - { - size_t n = set.size(); - size_t items = Primitive::blocks(n); - size_t start = set.begin(); - Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment); - typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items); - for (size_t i=0; i<items; i++) { - accel[i].fill(prims,start,set.end(),bvh->scene); - } - return node; - } - - BVH* bvh; - }; - - template<int N, typename Mesh, typename Primitive, typename Splitter> - struct BVHNBuilderFastSpatialSAH : public Builder - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - BVH* bvh; - Scene* scene; - Mesh* mesh; - mvector<PrimRef> prims0; - GeneralBVHBuilder::Settings settings; - const float splitFactor; - unsigned int geomID_ = std::numeric_limits<unsigned int>::max(); - unsigned int numPreviousPrimitives = 0; - - BVHNBuilderFastSpatialSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode) - : bvh(bvh), scene(scene), mesh(nullptr), prims0(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), - splitFactor(scene->device->max_spatial_split_replications) {} - - BVHNBuilderFastSpatialSAH (BVH* bvh, Mesh* mesh, const unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode) - : bvh(bvh), scene(nullptr), mesh(mesh), prims0(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), - splitFactor(scene->device->max_spatial_split_replications), geomID_(geomID) {} - - // FIXME: shrink bvh->alloc in destructor here and in other builders too - - void build() - { - /* we reset the allocator when the mesh size changed */ - if (mesh && mesh->numPrimitives != numPreviousPrimitives) { - bvh->alloc.clear(); - } - - /* skip build for empty scene */ - const size_t numOriginalPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(Mesh::geom_type,false); - numPreviousPrimitives = numOriginalPrimitives; - if (numOriginalPrimitives == 0) { - prims0.clear(); - bvh->clear(); - return; - } - - const unsigned int maxGeomID = mesh ? geomID_ : scene->getMaxGeomID<Mesh,false>(); - const bool usePreSplits = scene->device->useSpatialPreSplits || (maxGeomID >= ((unsigned int)1 << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS))); - double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + (usePreSplits ? "BuilderFastSpatialPresplitSAH" : "BuilderFastSpatialSAH")); - - /* create primref array */ - const size_t numSplitPrimitives = max(numOriginalPrimitives,size_t(splitFactor*numOriginalPrimitives)); - prims0.resize(numSplitPrimitives); - - /* enable os_malloc for two level build */ - if (mesh) - bvh->alloc.setOSallocation(true); - - NodeRef root(0); - PrimInfo pinfo; - - - if (likely(usePreSplits)) - { - /* spatial presplit SAH BVH builder */ - pinfo = mesh ? - createPrimRefArray_presplit<Mesh,Splitter>(mesh,maxGeomID,numOriginalPrimitives,prims0,bvh->scene->progressInterface) : - createPrimRefArray_presplit<Mesh,Splitter>(scene,Mesh::geom_type,false,numOriginalPrimitives,prims0,bvh->scene->progressInterface); - - const size_t node_bytes = pinfo.size()*sizeof(typename BVH::AABBNode)/(4*N); - const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive)); - bvh->alloc.init_estimate(node_bytes+leaf_bytes); - settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes); - - settings.branchingFactor = N; - settings.maxDepth = BVH::maxBuildDepthLeaf; - - /* call BVH builder */ - root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeafSpatial<N,Primitive>(bvh),bvh->scene->progressInterface,prims0.data(),pinfo,settings); - } - else - { - /* standard spatial split SAH BVH builder */ - pinfo = mesh ? - createPrimRefArray(mesh,geomID_,/*numSplitPrimitives,*/prims0,bvh->scene->progressInterface) : - createPrimRefArray(scene,Mesh::geom_type,false,/*numSplitPrimitives,*/prims0,bvh->scene->progressInterface); - - Splitter splitter(scene); - - const size_t node_bytes = pinfo.size()*sizeof(typename BVH::AABBNode)/(4*N); - const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive)); - bvh->alloc.init_estimate(node_bytes+leaf_bytes); - settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes); - - settings.branchingFactor = N; - settings.maxDepth = BVH::maxBuildDepthLeaf; - - /* call BVH builder */ - root = BVHBuilderBinnedFastSpatialSAH::build<NodeRef>( - typename BVH::CreateAlloc(bvh), - typename BVH::AABBNode::Create2(), - typename BVH::AABBNode::Set2(), - CreateLeafSpatial<N,Primitive>(bvh), - splitter, - bvh->scene->progressInterface, - prims0.data(), - numSplitPrimitives, - pinfo,settings); - - /* ==================== */ - } - - bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size()); - bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f)); - - /* clear temporary data for static geometry */ - if (scene && scene->isStaticAccel()) { - prims0.clear(); - } - bvh->cleanup(); - bvh->postBuild(t0); - } - - void clear() { - prims0.clear(); - } - }; - - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - - -#if defined(EMBREE_GEOMETRY_TRIANGLE) - - Builder* BVH4Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); } - Builder* BVH4Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); } - Builder* BVH4Triangle4iSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4i,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); } - -#if defined(__AVX__) - Builder* BVH8Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); } - Builder* BVH8Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); } -#endif -#endif - -#if defined(EMBREE_GEOMETRY_QUAD) - Builder* BVH4Quad4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,QuadMesh,Quad4v,QuadSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); } - -#if defined(__AVX__) - Builder* BVH8Quad4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,QuadMesh,Quad4v,QuadSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); } -#endif - -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.cpp deleted file mode 100644 index 1a78f347ac..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.cpp +++ /dev/null @@ -1,377 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "bvh_builder_twolevel.h" -#include "bvh_statistics.h" -#include "../builders/bvh_builder_sah.h" -#include "../common/scene_line_segments.h" -#include "../common/scene_triangle_mesh.h" -#include "../common/scene_quad_mesh.h" - -#define PROFILE 0 - -namespace embree -{ - namespace isa - { - template<int N, typename Mesh, typename Primitive> - BVHNBuilderTwoLevel<N,Mesh,Primitive>::BVHNBuilderTwoLevel (BVH* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder, const size_t singleThreadThreshold) - : bvh(bvh), scene(scene), refs(scene->device,0), prims(scene->device,0), singleThreadThreshold(singleThreadThreshold), gtype(gtype), useMortonBuilder_(useMortonBuilder) {} - - template<int N, typename Mesh, typename Primitive> - BVHNBuilderTwoLevel<N,Mesh,Primitive>::~BVHNBuilderTwoLevel () { - } - - // =========================================================================== - // =========================================================================== - // =========================================================================== - - template<int N, typename Mesh, typename Primitive> - void BVHNBuilderTwoLevel<N,Mesh,Primitive>::build() - { - /* delete some objects */ - size_t num = scene->size(); - if (num < bvh->objects.size()) { - parallel_for(num, bvh->objects.size(), [&] (const range<size_t>& r) { - for (size_t i=r.begin(); i<r.end(); i++) { - builders[i].reset(); - delete bvh->objects[i]; bvh->objects[i] = nullptr; - } - }); - } - -#if PROFILE - while(1) -#endif - { - /* reset memory allocator */ - bvh->alloc.reset(); - - /* skip build for empty scene */ - const size_t numPrimitives = scene->getNumPrimitives(gtype,false); - - if (numPrimitives == 0) { - prims.resize(0); - bvh->set(BVH::emptyNode,empty,0); - return; - } - - /* calculate the size of the entire BVH */ - const size_t numLeafBlocks = Primitive::blocks(numPrimitives); - const size_t node_bytes = 2*numLeafBlocks*sizeof(typename BVH::AABBNode)/N; - const size_t leaf_bytes = size_t(1.2*numLeafBlocks*sizeof(Primitive)); - bvh->alloc.init_estimate(node_bytes+leaf_bytes); - - double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderTwoLevel"); - - /* resize object array if scene got larger */ - if (bvh->objects.size() < num) bvh->objects.resize(num); - if (builders.size() < num) builders.resize(num); - resizeRefsList (); - nextRef.store(0); - - /* create acceleration structures */ - parallel_for(size_t(0), num, [&] (const range<size_t>& r) - { - for (size_t objectID=r.begin(); objectID<r.end(); objectID++) - { - Mesh* mesh = scene->getSafe<Mesh>(objectID); - - /* ignore meshes we do not support */ - if (mesh == nullptr || mesh->numTimeSteps != 1) - continue; - - if (isSmallGeometry(mesh)) { - setupSmallBuildRefBuilder (objectID, mesh); - } else { - setupLargeBuildRefBuilder (objectID, mesh); - } - } - }); - - /* parallel build of acceleration structures */ - parallel_for(size_t(0), num, [&] (const range<size_t>& r) - { - for (size_t objectID=r.begin(); objectID<r.end(); objectID++) - { - /* ignore if no triangle mesh or not enabled */ - Mesh* mesh = scene->getSafe<Mesh>(objectID); - if (mesh == nullptr || !mesh->isEnabled() || mesh->numTimeSteps != 1) - continue; - - builders[objectID]->attachBuildRefs (this); - } - }); - - -#if PROFILE - double d0 = getSeconds(); -#endif - /* fast path for single geometry scenes */ - if (nextRef == 1) { - bvh->set(refs[0].node,LBBox3fa(refs[0].bounds()),numPrimitives); - } - - else - { - /* open all large nodes */ - refs.resize(nextRef); - - /* this probably needs some more tuning */ - const size_t extSize = max(max((size_t)SPLIT_MIN_EXT_SPACE,refs.size()*SPLIT_MEMORY_RESERVE_SCALE),size_t((float)numPrimitives / SPLIT_MEMORY_RESERVE_FACTOR)); - -#if !ENABLE_DIRECT_SAH_MERGE_BUILDER - -#if ENABLE_OPEN_SEQUENTIAL - open_sequential(extSize); -#endif - /* compute PrimRefs */ - prims.resize(refs.size()); -#endif - -#if defined(TASKING_TBB) && defined(__AVX512ER__) && USE_TASK_ARENA // KNL - tbb::task_arena limited(min(32,(int)TaskScheduler::threadCount())); - limited.execute([&] -#endif - { -#if ENABLE_DIRECT_SAH_MERGE_BUILDER - - const PrimInfo pinfo = parallel_reduce(size_t(0), refs.size(), PrimInfo(empty), [&] (const range<size_t>& r) -> PrimInfo { - - PrimInfo pinfo(empty); - for (size_t i=r.begin(); i<r.end(); i++) { - pinfo.add_center2(refs[i]); - } - return pinfo; - }, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); }); - -#else - const PrimInfo pinfo = parallel_reduce(size_t(0), refs.size(), PrimInfo(empty), [&] (const range<size_t>& r) -> PrimInfo { - - PrimInfo pinfo(empty); - for (size_t i=r.begin(); i<r.end(); i++) { - pinfo.add_center2(refs[i]); - prims[i] = PrimRef(refs[i].bounds(),(size_t)refs[i].node); - } - return pinfo; - }, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); }); -#endif - - /* skip if all objects where empty */ - if (pinfo.size() == 0) - bvh->set(BVH::emptyNode,empty,0); - - /* otherwise build toplevel hierarchy */ - else - { - /* settings for BVH build */ - GeneralBVHBuilder::Settings settings; - settings.branchingFactor = N; - settings.maxDepth = BVH::maxBuildDepthLeaf; - settings.logBlockSize = bsr(N); - settings.minLeafSize = 1; - settings.maxLeafSize = 1; - settings.travCost = 1.0f; - settings.intCost = 1.0f; - settings.singleThreadThreshold = singleThreadThreshold; - -#if ENABLE_DIRECT_SAH_MERGE_BUILDER - - refs.resize(extSize); - - NodeRef root = BVHBuilderBinnedOpenMergeSAH::build<NodeRef,BuildRef>( - typename BVH::CreateAlloc(bvh), - typename BVH::AABBNode::Create2(), - typename BVH::AABBNode::Set2(), - - [&] (const BuildRef* refs, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> NodeRef { - assert(range.size() == 1); - return (NodeRef) refs[range.begin()].node; - }, - [&] (BuildRef &bref, BuildRef *refs) -> size_t { - return openBuildRef(bref,refs); - }, - [&] (size_t dn) { bvh->scene->progressMonitor(0); }, - refs.data(),extSize,pinfo,settings); -#else - NodeRef root = BVHBuilderBinnedSAH::build<NodeRef>( - typename BVH::CreateAlloc(bvh), - typename BVH::AABBNode::Create2(), - typename BVH::AABBNode::Set2(), - - [&] (const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> NodeRef { - assert(range.size() == 1); - return (NodeRef) prims[range.begin()].ID(); - }, - [&] (size_t dn) { bvh->scene->progressMonitor(0); }, - prims.data(),pinfo,settings); -#endif - - - bvh->set(root,LBBox3fa(pinfo.geomBounds),numPrimitives); - } - } -#if defined(TASKING_TBB) && defined(__AVX512ER__) && USE_TASK_ARENA // KNL - ); -#endif - - } - - bvh->alloc.cleanup(); - bvh->postBuild(t0); -#if PROFILE - double d1 = getSeconds(); - std::cout << "TOP_LEVEL OPENING/REBUILD TIME " << 1000.0*(d1-d0) << " ms" << std::endl; -#endif - } - - } - - template<int N, typename Mesh, typename Primitive> - void BVHNBuilderTwoLevel<N,Mesh,Primitive>::deleteGeometry(size_t geomID) - { - if (geomID >= bvh->objects.size()) return; - if (builders[geomID]) builders[geomID].reset(); - delete bvh->objects [geomID]; bvh->objects [geomID] = nullptr; - } - - template<int N, typename Mesh, typename Primitive> - void BVHNBuilderTwoLevel<N,Mesh,Primitive>::clear() - { - for (size_t i=0; i<bvh->objects.size(); i++) - if (bvh->objects[i]) bvh->objects[i]->clear(); - - for (size_t i=0; i<builders.size(); i++) - if (builders[i]) builders[i].reset(); - - refs.clear(); - } - - template<int N, typename Mesh, typename Primitive> - void BVHNBuilderTwoLevel<N,Mesh,Primitive>::open_sequential(const size_t extSize) - { - if (refs.size() == 0) - return; - - refs.reserve(extSize); - -#if 1 - for (size_t i=0;i<refs.size();i++) - { - NodeRef ref = refs[i].node; - if (ref.isAABBNode()) - BVH::prefetch(ref); - } -#endif - - std::make_heap(refs.begin(),refs.end()); - while (refs.size()+N-1 <= extSize) - { - std::pop_heap (refs.begin(),refs.end()); - NodeRef ref = refs.back().node; - if (ref.isLeaf()) break; - refs.pop_back(); - - AABBNode* node = ref.getAABBNode(); - for (size_t i=0; i<N; i++) { - if (node->child(i) == BVH::emptyNode) continue; - refs.push_back(BuildRef(node->bounds(i),node->child(i))); - -#if 1 - NodeRef ref_pre = node->child(i); - if (ref_pre.isAABBNode()) - ref_pre.prefetch(); -#endif - std::push_heap (refs.begin(),refs.end()); - } - } - } - - template<int N, typename Mesh, typename Primitive> - void BVHNBuilderTwoLevel<N,Mesh,Primitive>::setupSmallBuildRefBuilder (size_t objectID, Mesh const * const /*mesh*/) - { - if (builders[objectID] == nullptr || // new mesh - dynamic_cast<RefBuilderSmall*>(builders[objectID].get()) == nullptr) // size change resulted in large->small change - { - builders[objectID].reset (new RefBuilderSmall(objectID)); - } - } - - template<int N, typename Mesh, typename Primitive> - void BVHNBuilderTwoLevel<N,Mesh,Primitive>::setupLargeBuildRefBuilder (size_t objectID, Mesh const * const mesh) - { - if (bvh->objects[objectID] == nullptr || // new mesh - builders[objectID]->meshQualityChanged (mesh->quality) || // changed build quality - dynamic_cast<RefBuilderLarge*>(builders[objectID].get()) == nullptr) // size change resulted in small->large change - { - Builder* builder = nullptr; - delete bvh->objects[objectID]; - createMeshAccel(objectID, builder); - builders[objectID].reset (new RefBuilderLarge(objectID, builder, mesh->quality)); - } - } - -#if defined(EMBREE_GEOMETRY_TRIANGLE) - Builder* BVH4BuilderTwoLevelTriangle4MeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) { - return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder); - } - Builder* BVH4BuilderTwoLevelTriangle4vMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) { - return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4v>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder); - } - Builder* BVH4BuilderTwoLevelTriangle4iMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) { - return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4i>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder); - } -#endif - -#if defined(EMBREE_GEOMETRY_QUAD) - Builder* BVH4BuilderTwoLevelQuadMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) { - return new BVHNBuilderTwoLevel<4,QuadMesh,Quad4v>((BVH4*)bvh,scene,QuadMesh::geom_type,useMortonBuilder); - } -#endif - -#if defined(EMBREE_GEOMETRY_USER) - Builder* BVH4BuilderTwoLevelVirtualSAH (void* bvh, Scene* scene, bool useMortonBuilder) { - return new BVHNBuilderTwoLevel<4,UserGeometry,Object>((BVH4*)bvh,scene,UserGeometry::geom_type,useMortonBuilder); - } -#endif - -#if defined(EMBREE_GEOMETRY_INSTANCE) - Builder* BVH4BuilderTwoLevelInstanceSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) { - return new BVHNBuilderTwoLevel<4,Instance,InstancePrimitive>((BVH4*)bvh,scene,gtype,useMortonBuilder); - } -#endif - -#if defined(__AVX__) -#if defined(EMBREE_GEOMETRY_TRIANGLE) - Builder* BVH8BuilderTwoLevelTriangle4MeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) { - return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder); - } - Builder* BVH8BuilderTwoLevelTriangle4vMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) { - return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4v>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder); - } - Builder* BVH8BuilderTwoLevelTriangle4iMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) { - return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4i>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder); - } -#endif - -#if defined(EMBREE_GEOMETRY_QUAD) - Builder* BVH8BuilderTwoLevelQuadMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) { - return new BVHNBuilderTwoLevel<8,QuadMesh,Quad4v>((BVH8*)bvh,scene,QuadMesh::geom_type,useMortonBuilder); - } -#endif - -#if defined(EMBREE_GEOMETRY_USER) - Builder* BVH8BuilderTwoLevelVirtualSAH (void* bvh, Scene* scene, bool useMortonBuilder) { - return new BVHNBuilderTwoLevel<8,UserGeometry,Object>((BVH8*)bvh,scene,UserGeometry::geom_type,useMortonBuilder); - } -#endif - -#if defined(EMBREE_GEOMETRY_INSTANCE) - Builder* BVH8BuilderTwoLevelInstanceSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) { - return new BVHNBuilderTwoLevel<8,Instance,InstancePrimitive>((BVH8*)bvh,scene,gtype,useMortonBuilder); - } -#endif - -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.h deleted file mode 100644 index 8f57c3b406..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.h +++ /dev/null @@ -1,263 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include <type_traits> - -#include "bvh_builder_twolevel_internal.h" -#include "bvh.h" -#include "../common/primref.h" -#include "../builders/priminfo.h" -#include "../builders/primrefgen.h" - -/* new open/merge builder */ -#define ENABLE_DIRECT_SAH_MERGE_BUILDER 1 -#define ENABLE_OPEN_SEQUENTIAL 0 -#define SPLIT_MEMORY_RESERVE_FACTOR 1000 -#define SPLIT_MEMORY_RESERVE_SCALE 2 -#define SPLIT_MIN_EXT_SPACE 1000 - -namespace embree -{ - namespace isa - { - template<int N, typename Mesh, typename Primitive> - class BVHNBuilderTwoLevel : public Builder - { - typedef BVHN<N> BVH; - typedef typename BVH::AABBNode AABBNode; - typedef typename BVH::NodeRef NodeRef; - - __forceinline static bool isSmallGeometry(Mesh* mesh) { - return mesh->size() <= 4; - } - - public: - - typedef void (*createMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder); - - struct BuildRef : public PrimRef - { - public: - __forceinline BuildRef () {} - - __forceinline BuildRef (const BBox3fa& bounds, NodeRef node) - : PrimRef(bounds,(size_t)node), node(node) - { - if (node.isLeaf()) - bounds_area = 0.0f; - else - bounds_area = area(this->bounds()); - } - - /* used by the open/merge bvh builder */ - __forceinline BuildRef (const BBox3fa& bounds, NodeRef node, const unsigned int geomID, const unsigned int numPrimitives) - : PrimRef(bounds,geomID,numPrimitives), node(node) - { - /* important for relative buildref ordering */ - if (node.isLeaf()) - bounds_area = 0.0f; - else - bounds_area = area(this->bounds()); - } - - __forceinline size_t size() const { - return primID(); - } - - friend bool operator< (const BuildRef& a, const BuildRef& b) { - return a.bounds_area < b.bounds_area; - } - - friend __forceinline embree_ostream operator<<(embree_ostream cout, const BuildRef& ref) { - return cout << "{ lower = " << ref.lower << ", upper = " << ref.upper << ", center2 = " << ref.center2() << ", geomID = " << ref.geomID() << ", numPrimitives = " << ref.numPrimitives() << ", bounds_area = " << ref.bounds_area << " }"; - } - - __forceinline unsigned int numPrimitives() const { return primID(); } - - public: - NodeRef node; - float bounds_area; - }; - - - __forceinline size_t openBuildRef(BuildRef &bref, BuildRef *const refs) { - if (bref.node.isLeaf()) - { - refs[0] = bref; - return 1; - } - NodeRef ref = bref.node; - unsigned int geomID = bref.geomID(); - unsigned int numPrims = max((unsigned int)bref.numPrimitives() / N,(unsigned int)1); - AABBNode* node = ref.getAABBNode(); - size_t n = 0; - for (size_t i=0; i<N; i++) { - if (node->child(i) == BVH::emptyNode) continue; - refs[i] = BuildRef(node->bounds(i),node->child(i),geomID,numPrims); - n++; - } - assert(n > 1); - return n; - } - - /*! Constructor. */ - BVHNBuilderTwoLevel (BVH* bvh, Scene* scene, Geometry::GTypeMask gtype = Mesh::geom_type, bool useMortonBuilder = false, const size_t singleThreadThreshold = DEFAULT_SINGLE_THREAD_THRESHOLD); - - /*! Destructor */ - ~BVHNBuilderTwoLevel (); - - /*! builder entry point */ - void build(); - void deleteGeometry(size_t geomID); - void clear(); - - void open_sequential(const size_t extSize); - - private: - - class RefBuilderBase { - public: - virtual ~RefBuilderBase () {} - virtual void attachBuildRefs (BVHNBuilderTwoLevel* builder) = 0; - virtual bool meshQualityChanged (RTCBuildQuality currQuality) = 0; - }; - - class RefBuilderSmall : public RefBuilderBase { - public: - - RefBuilderSmall (size_t objectID) - : objectID_ (objectID) {} - - void attachBuildRefs (BVHNBuilderTwoLevel* topBuilder) { - - Mesh* mesh = topBuilder->scene->template getSafe<Mesh>(objectID_); - size_t meshSize = mesh->size(); - assert(isSmallGeometry(mesh)); - - mvector<PrimRef> prefs(topBuilder->scene->device, meshSize); - auto pinfo = createPrimRefArray(mesh,objectID_,prefs,topBuilder->bvh->scene->progressInterface); - - size_t begin=0; - while (begin < pinfo.size()) - { - Primitive* accel = (Primitive*) topBuilder->bvh->alloc.getCachedAllocator().malloc1(sizeof(Primitive),BVH::byteAlignment); - typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,1); - accel->fill(prefs.data(),begin,pinfo.size(),topBuilder->bvh->scene); - - /* create build primitive */ -#if ENABLE_DIRECT_SAH_MERGE_BUILDER - topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(pinfo.geomBounds,node,(unsigned int)objectID_,1); -#else - topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(pinfo.geomBounds,node); -#endif - } - assert(begin == pinfo.size()); - } - - bool meshQualityChanged (RTCBuildQuality /*currQuality*/) { - return false; - } - - size_t objectID_; - }; - - class RefBuilderLarge : public RefBuilderBase { - public: - - RefBuilderLarge (size_t objectID, const Ref<Builder>& builder, RTCBuildQuality quality) - : objectID_ (objectID), builder_ (builder), quality_ (quality) {} - - void attachBuildRefs (BVHNBuilderTwoLevel* topBuilder) - { - BVH* object = topBuilder->getBVH(objectID_); assert(object); - - /* build object if it got modified */ - if (topBuilder->isGeometryModified(objectID_)) - builder_->build(); - - /* create build primitive */ - if (!object->getBounds().empty()) - { -#if ENABLE_DIRECT_SAH_MERGE_BUILDER - Mesh* mesh = topBuilder->getMesh(objectID_); - topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(object->getBounds(),object->root,(unsigned int)objectID_,(unsigned int)mesh->size()); -#else - topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(object->getBounds(),object->root); -#endif - } - } - - bool meshQualityChanged (RTCBuildQuality currQuality) { - return currQuality != quality_; - } - - private: - size_t objectID_; - Ref<Builder> builder_; - RTCBuildQuality quality_; - }; - - void setupLargeBuildRefBuilder (size_t objectID, Mesh const * const mesh); - void setupSmallBuildRefBuilder (size_t objectID, Mesh const * const mesh); - - BVH* getBVH (size_t objectID) { - return this->bvh->objects[objectID]; - } - Mesh* getMesh (size_t objectID) { - return this->scene->template getSafe<Mesh>(objectID); - } - bool isGeometryModified (size_t objectID) { - return this->scene->isGeometryModified(objectID); - } - - void resizeRefsList () - { - size_t num = parallel_reduce (size_t(0), scene->size(), size_t(0), - [this](const range<size_t>& r)->size_t { - size_t c = 0; - for (auto i=r.begin(); i<r.end(); ++i) { - Mesh* mesh = scene->getSafe<Mesh>(i); - if (mesh == nullptr || mesh->numTimeSteps != 1) - continue; - size_t meshSize = mesh->size(); - c += isSmallGeometry(mesh) ? Primitive::blocks(meshSize) : 1; - } - return c; - }, - std::plus<size_t>() - ); - - if (refs.size() < num) { - refs.resize(num); - } - } - - void createMeshAccel (size_t geomID, Builder*& builder) - { - bvh->objects[geomID] = new BVH(Primitive::type,scene); - BVH* accel = bvh->objects[geomID]; - auto mesh = scene->getSafe<Mesh>(geomID); - if (nullptr == mesh) { - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"geomID does not return correct type"); - return; - } - - __internal_two_level_builder__::MeshBuilder<N,Mesh,Primitive>()(accel, mesh, geomID, this->gtype, this->useMortonBuilder_, builder); - } - - using BuilderList = std::vector<std::unique_ptr<RefBuilderBase>>; - - BuilderList builders; - BVH* bvh; - Scene* scene; - mvector<BuildRef> refs; - mvector<PrimRef> prims; - std::atomic<int> nextRef; - const size_t singleThreadThreshold; - Geometry::GTypeMask gtype; - bool useMortonBuilder_ = false; - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel_internal.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel_internal.h deleted file mode 100644 index 1c1ae8d6a7..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel_internal.h +++ /dev/null @@ -1,267 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh.h" -#include "../geometry/triangle.h" -#include "../geometry/trianglev.h" -#include "../geometry/trianglei.h" -#include "../geometry/quadv.h" -#include "../geometry/quadi.h" -#include "../geometry/object.h" -#include "../geometry/instance.h" - -namespace embree -{ - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshBuilderMortonGeneral,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshBuilderSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshRefitSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshBuilderMortonGeneral,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshBuilderSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshRefitSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshBuilderMortonGeneral,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshBuilderSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshRefitSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t) - DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshBuilderMortonGeneral,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshBuilderSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshRefitSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshBuilderMortonGeneral,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshBuilderSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshRefitSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshBuilderMortonGeneral,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshBuilderSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t); - DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshRefitSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t) - - namespace isa - { - - namespace __internal_two_level_builder__ { - - template<int N, typename Mesh, typename Primitive> - struct MortonBuilder {}; - template<> - struct MortonBuilder<4,TriangleMesh,Triangle4> { - MortonBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshBuilderMortonGeneral(bvh,mesh,geomID,0);} - }; - template<> - struct MortonBuilder<4,TriangleMesh,Triangle4v> { - MortonBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);} - }; - template<> - struct MortonBuilder<4,TriangleMesh,Triangle4i> { - MortonBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshBuilderMortonGeneral(bvh,mesh,geomID,0);} - }; - template<> - struct MortonBuilder<4,QuadMesh,Quad4v> { - MortonBuilder () {} - Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);} - }; - template<> - struct MortonBuilder<4,UserGeometry,Object> { - MortonBuilder () {} - Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshBuilderMortonGeneral(bvh,mesh,geomID,0);} - }; - template<> - struct MortonBuilder<4,Instance,InstancePrimitive> { - MortonBuilder () {} - Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);} - }; - template<> - struct MortonBuilder<8,TriangleMesh,Triangle4> { - MortonBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshBuilderMortonGeneral(bvh,mesh,geomID,0);} - }; - template<> - struct MortonBuilder<8,TriangleMesh,Triangle4v> { - MortonBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);} - }; - template<> - struct MortonBuilder<8,TriangleMesh,Triangle4i> { - MortonBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshBuilderMortonGeneral(bvh,mesh,geomID,0);} - }; - template<> - struct MortonBuilder<8,QuadMesh,Quad4v> { - MortonBuilder () {} - Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);} - }; - template<> - struct MortonBuilder<8,UserGeometry,Object> { - MortonBuilder () {} - Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshBuilderMortonGeneral(bvh,mesh,geomID,0);} - }; - template<> - struct MortonBuilder<8,Instance,InstancePrimitive> { - MortonBuilder () {} - Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);} - }; - - template<int N, typename Mesh, typename Primitive> - struct SAHBuilder {}; - template<> - struct SAHBuilder<4,TriangleMesh,Triangle4> { - SAHBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshBuilderSAH(bvh,mesh,geomID,0);} - }; - template<> - struct SAHBuilder<4,TriangleMesh,Triangle4v> { - SAHBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshBuilderSAH(bvh,mesh,geomID,0);} - }; - template<> - struct SAHBuilder<4,TriangleMesh,Triangle4i> { - SAHBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshBuilderSAH(bvh,mesh,geomID,0);} - }; - template<> - struct SAHBuilder<4,QuadMesh,Quad4v> { - SAHBuilder () {} - Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshBuilderSAH(bvh,mesh,geomID,0);} - }; - template<> - struct SAHBuilder<4,UserGeometry,Object> { - SAHBuilder () {} - Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshBuilderSAH(bvh,mesh,geomID,0);} - }; - template<> - struct SAHBuilder<4,Instance,InstancePrimitive> { - SAHBuilder () {} - Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshBuilderSAH(bvh,mesh,gtype,geomID,0);} - }; - template<> - struct SAHBuilder<8,TriangleMesh,Triangle4> { - SAHBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshBuilderSAH(bvh,mesh,geomID,0);} - }; - template<> - struct SAHBuilder<8,TriangleMesh,Triangle4v> { - SAHBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshBuilderSAH(bvh,mesh,geomID,0);} - }; - template<> - struct SAHBuilder<8,TriangleMesh,Triangle4i> { - SAHBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshBuilderSAH(bvh,mesh,geomID,0);} - }; - template<> - struct SAHBuilder<8,QuadMesh,Quad4v> { - SAHBuilder () {} - Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshBuilderSAH(bvh,mesh,geomID,0);} - }; - template<> - struct SAHBuilder<8,UserGeometry,Object> { - SAHBuilder () {} - Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshBuilderSAH(bvh,mesh,geomID,0);} - }; - template<> - struct SAHBuilder<8,Instance,InstancePrimitive> { - SAHBuilder () {} - Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshBuilderSAH(bvh,mesh,gtype,geomID,0);} - }; - - template<int N, typename Mesh, typename Primitive> - struct RefitBuilder {}; - template<> - struct RefitBuilder<4,TriangleMesh,Triangle4> { - RefitBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshRefitSAH(bvh,mesh,geomID,0);} - }; - template<> - struct RefitBuilder<4,TriangleMesh,Triangle4v> { - RefitBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshRefitSAH(bvh,mesh,geomID,0);} - }; - template<> - struct RefitBuilder<4,TriangleMesh,Triangle4i> { - RefitBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshRefitSAH(bvh,mesh,geomID,0);} - }; - template<> - struct RefitBuilder<4,QuadMesh,Quad4v> { - RefitBuilder () {} - Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshRefitSAH(bvh,mesh,geomID,0);} - }; - template<> - struct RefitBuilder<4,UserGeometry,Object> { - RefitBuilder () {} - Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshRefitSAH(bvh,mesh,geomID,0);} - }; - template<> - struct RefitBuilder<4,Instance,InstancePrimitive> { - RefitBuilder () {} - Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshRefitSAH(bvh,mesh,gtype,geomID,0);} - }; - template<> - struct RefitBuilder<8,TriangleMesh,Triangle4> { - RefitBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshRefitSAH(bvh,mesh,geomID,0);} - }; - template<> - struct RefitBuilder<8,TriangleMesh,Triangle4v> { - RefitBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshRefitSAH(bvh,mesh,geomID,0);} - }; - template<> - struct RefitBuilder<8,TriangleMesh,Triangle4i> { - RefitBuilder () {} - Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshRefitSAH(bvh,mesh,geomID,0);} - }; - template<> - struct RefitBuilder<8,QuadMesh,Quad4v> { - RefitBuilder () {} - Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshRefitSAH(bvh,mesh,geomID,0);} - }; - template<> - struct RefitBuilder<8,UserGeometry,Object> { - RefitBuilder () {} - Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshRefitSAH(bvh,mesh,geomID,0);} - }; - template<> - struct RefitBuilder<8,Instance,InstancePrimitive> { - RefitBuilder () {} - Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshRefitSAH(bvh,mesh,gtype,geomID,0);} - }; - - template<int N, typename Mesh, typename Primitive> - struct MeshBuilder { - MeshBuilder () {} - void operator () (void* bvh, Mesh* mesh, size_t geomID, Geometry::GTypeMask gtype, bool useMortonBuilder, Builder*& builder) { - if(useMortonBuilder) { - builder = MortonBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); - return; - } - switch (mesh->quality) { - case RTC_BUILD_QUALITY_LOW: builder = MortonBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break; - case RTC_BUILD_QUALITY_MEDIUM: - case RTC_BUILD_QUALITY_HIGH: builder = SAHBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break; - case RTC_BUILD_QUALITY_REFIT: builder = RefitBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break; - default: throw_RTCError(RTC_ERROR_UNKNOWN,"invalid build quality"); - } - } - }; - } - } -}
\ No newline at end of file diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.cpp deleted file mode 100644 index a27be8bae8..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.cpp +++ /dev/null @@ -1,375 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "bvh_collider.h" -#include "../geometry/triangle_triangle_intersector.h" - -namespace embree -{ - namespace isa - { -#define CSTAT(x) - - size_t parallel_depth_threshold = 3; - CSTAT(std::atomic<size_t> bvh_collide_traversal_steps(0)); - CSTAT(std::atomic<size_t> bvh_collide_leaf_pairs(0)); - CSTAT(std::atomic<size_t> bvh_collide_leaf_iterations(0)); - CSTAT(std::atomic<size_t> bvh_collide_prim_intersections1(0)); - CSTAT(std::atomic<size_t> bvh_collide_prim_intersections2(0)); - CSTAT(std::atomic<size_t> bvh_collide_prim_intersections3(0)); - CSTAT(std::atomic<size_t> bvh_collide_prim_intersections4(0)); - CSTAT(std::atomic<size_t> bvh_collide_prim_intersections5(0)); - CSTAT(std::atomic<size_t> bvh_collide_prim_intersections(0)); - - struct Collision - { - __forceinline Collision() {} - - __forceinline Collision (unsigned geomID0, unsigned primID0, unsigned geomID1, unsigned primID1) - : geomID0(geomID0), primID0(primID0), geomID1(geomID1), primID1(primID1) {} - - unsigned geomID0; - unsigned primID0; - unsigned geomID1; - unsigned primID1; - }; - - template<int N> - __forceinline size_t overlap(const BBox3fa& box0, const typename BVHN<N>::AABBNode& node1) - { - const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x),node1.lower_x); - const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y),node1.lower_y); - const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z),node1.lower_z); - const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x),node1.upper_x); - const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y),node1.upper_y); - const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z),node1.upper_z); - return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z)); - } - - template<int N> - __forceinline size_t overlap(const BBox3fa& box0, const BBox<Vec3<vfloat<N>>>& box1) - { - const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x),box1.lower.x); - const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y),box1.lower.y); - const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z),box1.lower.z); - const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x),box1.upper.x); - const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y),box1.upper.y); - const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z),box1.upper.z); - return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z)); - } - - template<int N> - __forceinline size_t overlap(const BBox<Vec3<vfloat<N>>>& box0, size_t i, const BBox<Vec3<vfloat<N>>>& box1) - { - const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x[i]),box1.lower.x); - const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y[i]),box1.lower.y); - const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z[i]),box1.lower.z); - const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x[i]),box1.upper.x); - const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y[i]),box1.upper.y); - const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z[i]),box1.upper.z); - return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z)); - } - - bool intersect_triangle_triangle (Scene* scene0, unsigned geomID0, unsigned primID0, Scene* scene1, unsigned geomID1, unsigned primID1) - { - CSTAT(bvh_collide_prim_intersections1++); - const TriangleMesh* mesh0 = scene0->get<TriangleMesh>(geomID0); - const TriangleMesh* mesh1 = scene1->get<TriangleMesh>(geomID1); - const TriangleMesh::Triangle& tri0 = mesh0->triangle(primID0); - const TriangleMesh::Triangle& tri1 = mesh1->triangle(primID1); - - /* special culling for scene intersection with itself */ - if (scene0 == scene1 && geomID0 == geomID1) - { - /* ignore self intersections */ - if (primID0 == primID1) - return false; - } - CSTAT(bvh_collide_prim_intersections2++); - - if (scene0 == scene1 && geomID0 == geomID1) - { - /* ignore intersection with topological neighbors */ - const vint4 t0(tri0.v[0],tri0.v[1],tri0.v[2],tri0.v[2]); - if (any(vint4(tri1.v[0]) == t0)) return false; - if (any(vint4(tri1.v[1]) == t0)) return false; - if (any(vint4(tri1.v[2]) == t0)) return false; - } - CSTAT(bvh_collide_prim_intersections3++); - - const Vec3fa a0 = mesh0->vertex(tri0.v[0]); - const Vec3fa a1 = mesh0->vertex(tri0.v[1]); - const Vec3fa a2 = mesh0->vertex(tri0.v[2]); - const Vec3fa b0 = mesh1->vertex(tri1.v[0]); - const Vec3fa b1 = mesh1->vertex(tri1.v[1]); - const Vec3fa b2 = mesh1->vertex(tri1.v[2]); - - return TriangleTriangleIntersector::intersect_triangle_triangle(a0,a1,a2,b0,b1,b2); - } - - template<int N> - __forceinline void BVHNColliderUserGeom<N>::processLeaf(NodeRef node0, NodeRef node1) - { - Collision collisions[16]; - size_t num_collisions = 0; - - size_t N0; Object* leaf0 = (Object*) node0.leaf(N0); - size_t N1; Object* leaf1 = (Object*) node1.leaf(N1); - for (size_t i=0; i<N0; i++) { - for (size_t j=0; j<N1; j++) { - const unsigned geomID0 = leaf0[i].geomID(); - const unsigned primID0 = leaf0[i].primID(); - const unsigned geomID1 = leaf1[j].geomID(); - const unsigned primID1 = leaf1[j].primID(); - if (this->scene0 == this->scene1 && geomID0 == geomID1 && primID0 == primID1) continue; - collisions[num_collisions++] = Collision(geomID0,primID0,geomID1,primID1); - if (num_collisions == 16) { - this->callback(this->userPtr,(RTCCollision*)&collisions,num_collisions); - num_collisions = 0; - } - } - } - if (num_collisions) - this->callback(this->userPtr,(RTCCollision*)&collisions,num_collisions); - } - - template<int N> - void BVHNCollider<N>::collide_recurse(NodeRef ref0, const BBox3fa& bounds0, NodeRef ref1, const BBox3fa& bounds1, size_t depth0, size_t depth1) - { - CSTAT(bvh_collide_traversal_steps++); - if (unlikely(ref0.isLeaf())) { - if (unlikely(ref1.isLeaf())) { - CSTAT(bvh_collide_leaf_pairs++); - processLeaf(ref0,ref1); - return; - } else goto recurse_node1; - - } else { - if (unlikely(ref1.isLeaf())) { - goto recurse_node0; - } else { - if (area(bounds0) > area(bounds1)) { - goto recurse_node0; - } - else { - goto recurse_node1; - } - } - } - - { - recurse_node0: - AABBNode* node0 = ref0.getAABBNode(); - size_t mask = overlap<N>(bounds1,*node0); - //for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) { - //for (size_t i=0; i<N; i++) { -#if 0 - if (depth0 < parallel_depth_threshold) - { - parallel_for(size_t(N), [&] ( size_t i ) { - if (mask & ( 1 << i)) { - BVHN<N>::prefetch(node0->child(i),BVH_FLAG_ALIGNED_NODE); - collide_recurse(node0->child(i),node0->bounds(i),ref1,bounds1,depth0+1,depth1); - } - }); - } - else -#endif - { - for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) { - BVHN<N>::prefetch(node0->child(i),BVH_FLAG_ALIGNED_NODE); - collide_recurse(node0->child(i),node0->bounds(i),ref1,bounds1,depth0+1,depth1); - } - } - return; - } - - { - recurse_node1: - AABBNode* node1 = ref1.getAABBNode(); - size_t mask = overlap<N>(bounds0,*node1); - //for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) { - //for (size_t i=0; i<N; i++) { -#if 0 - if (depth1 < parallel_depth_threshold) - { - parallel_for(size_t(N), [&] ( size_t i ) { - if (mask & ( 1 << i)) { - BVHN<N>::prefetch(node1->child(i),BVH_FLAG_ALIGNED_NODE); - collide_recurse(ref0,bounds0,node1->child(i),node1->bounds(i),depth0,depth1+1); - } - }); - } - else -#endif - { - for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) { - BVHN<N>::prefetch(node1->child(i),BVH_FLAG_ALIGNED_NODE); - collide_recurse(ref0,bounds0,node1->child(i),node1->bounds(i),depth0,depth1+1); - } - } - return; - } - } - - template<int N> - void BVHNCollider<N>::split(const CollideJob& job, jobvector& jobs) - { - if (unlikely(job.ref0.isLeaf())) { - if (unlikely(job.ref1.isLeaf())) { - jobs.push_back(job); - return; - } else goto recurse_node1; - } else { - if (unlikely(job.ref1.isLeaf())) { - goto recurse_node0; - } else { - if (area(job.bounds0) > area(job.bounds1)) { - goto recurse_node0; - } - else { - goto recurse_node1; - } - } - } - - { - recurse_node0: - const AABBNode* node0 = job.ref0.getAABBNode(); - size_t mask = overlap<N>(job.bounds1,*node0); - for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) { - jobs.push_back(CollideJob(node0->child(i),node0->bounds(i),job.depth0+1,job.ref1,job.bounds1,job.depth1)); - } - return; - } - - { - recurse_node1: - const AABBNode* node1 = job.ref1.getAABBNode(); - size_t mask = overlap<N>(job.bounds0,*node1); - for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) { - jobs.push_back(CollideJob(job.ref0,job.bounds0,job.depth0,node1->child(i),node1->bounds(i),job.depth1+1)); - } - return; - } - } - - template<int N> - void BVHNCollider<N>::collide_recurse_entry(NodeRef ref0, const BBox3fa& bounds0, NodeRef ref1, const BBox3fa& bounds1) - { - CSTAT(bvh_collide_traversal_steps = 0); - CSTAT(bvh_collide_leaf_pairs = 0); - CSTAT(bvh_collide_leaf_iterations = 0); - CSTAT(bvh_collide_prim_intersections1 = 0); - CSTAT(bvh_collide_prim_intersections2 = 0); - CSTAT(bvh_collide_prim_intersections3 = 0); - CSTAT(bvh_collide_prim_intersections4 = 0); - CSTAT(bvh_collide_prim_intersections5 = 0); - CSTAT(bvh_collide_prim_intersections = 0); -#if 0 - collide_recurse(ref0,bounds0,ref1,bounds1,0,0); -#else - const int M = 2048; - jobvector jobs[2]; - jobs[0].reserve(M); - jobs[1].reserve(M); - jobs[0].push_back(CollideJob(ref0,bounds0,0,ref1,bounds1,0)); - int source = 0; - int target = 1; - - /* try to split job until job list is full */ - while (jobs[source].size()+8 <= M) - { - for (size_t i=0; i<jobs[source].size(); i++) - { - const CollideJob& job = jobs[source][i]; - size_t remaining = jobs[source].size()-i; - if (jobs[target].size()+remaining+8 > M) { - jobs[target].push_back(job); - } else { - split(job,jobs[target]); - } - } - - /* stop splitting jobs if we reached only leaves and cannot make progress anymore */ - if (jobs[target].size() == jobs[source].size()) - break; - - jobs[source].resize(0); - std::swap(source,target); - } - - /* parallel processing of all jobs */ - parallel_for(size_t(jobs[source].size()), [&] ( size_t i ) { - CollideJob& j = jobs[source][i]; - collide_recurse(j.ref0,j.bounds0,j.ref1,j.bounds1,j.depth0,j.depth1); - }); - - -#endif - CSTAT(PRINT(bvh_collide_traversal_steps)); - CSTAT(PRINT(bvh_collide_leaf_pairs)); - CSTAT(PRINT(bvh_collide_leaf_iterations)); - CSTAT(PRINT(bvh_collide_prim_intersections1)); - CSTAT(PRINT(bvh_collide_prim_intersections2)); - CSTAT(PRINT(bvh_collide_prim_intersections3)); - CSTAT(PRINT(bvh_collide_prim_intersections4)); - CSTAT(PRINT(bvh_collide_prim_intersections5)); - CSTAT(PRINT(bvh_collide_prim_intersections)); - } - - template<int N> - void BVHNColliderUserGeom<N>::collide(BVH* __restrict__ bvh0, BVH* __restrict__ bvh1, RTCCollideFunc callback, void* userPtr) - { - BVHNColliderUserGeom<N>(bvh0->scene,bvh1->scene,callback,userPtr). - collide_recurse_entry(bvh0->root,bvh0->bounds.bounds(),bvh1->root,bvh1->bounds.bounds()); - } - -#if defined (EMBREE_LOWEST_ISA) - struct collision_regression_test : public RegressionTest - { - collision_regression_test(const char* name) : RegressionTest(name) { - registerRegressionTest(this); - } - - bool run () - { - bool passed = true; - passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(-0.008815f, 0.041848f, -2.49875e-06f), Vec3fa(-0.008276f, 0.053318f, -2.49875e-06f), Vec3fa(0.003023f, 0.048969f, -2.49875e-06f), - Vec3fa(0.00245f, 0.037612f, -2.49875e-06f), Vec3fa(0.01434f, 0.042634f, -2.49875e-06f), Vec3fa(0.013499f, 0.031309f, -2.49875e-06f)) == false; - passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true; - passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,1),Vec3fa(1,0,1),Vec3fa(0,1,1)) == false; - passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,1),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true; - passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true; - passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true; - passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,-0.1f),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true; - passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true; - passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(0.5f,0,0),Vec3fa(0,0.5f,0)) == true; - passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(0.5f,0,0),Vec3fa(0,0.5f,0)) == true; - passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true; - passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,-0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true; - passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(-0.1f,0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true; - passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), - Vec3fa(-1,1,0) + Vec3fa(0,0,0),Vec3fa(-1,1,0) + Vec3fa(0.1f,0,0),Vec3fa(-1,1,0) + Vec3fa(0,0.1f,0)) == false; - passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), - Vec3fa( 2,0.5f,0) + Vec3fa(0,0,0),Vec3fa( 2,0.5f,0) + Vec3fa(0.1f,0,0),Vec3fa( 2,0.5f,0) + Vec3fa(0,0.1f,0)) == false; - passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), - Vec3fa(0.5f,-2.0f,0) + Vec3fa(0,0,0),Vec3fa(0.5f,-2.0f,0) + Vec3fa(0.1f,0,0),Vec3fa(0.5f,-2.0f,0) + Vec3fa(0,0.1f,0)) == false; - return passed; - } - }; - - collision_regression_test collision_regression("collision_regression_test"); -#endif - - //////////////////////////////////////////////////////////////////////////////// - /// Collider Definitions - //////////////////////////////////////////////////////////////////////////////// - - DEFINE_COLLIDER(BVH4ColliderUserGeom,BVHNColliderUserGeom<4>); - -#if defined(__AVX__) - DEFINE_COLLIDER(BVH8ColliderUserGeom,BVHNColliderUserGeom<8>); -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.h deleted file mode 100644 index ac4f99c96a..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.h +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh.h" -#include "../geometry/trianglev.h" -#include "../geometry/object.h" - -namespace embree -{ - namespace isa - { - template<int N> - class BVHNCollider - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::AABBNode AABBNode; - - struct CollideJob - { - CollideJob () {} - - CollideJob (NodeRef ref0, const BBox3fa& bounds0, size_t depth0, - NodeRef ref1, const BBox3fa& bounds1, size_t depth1) - : ref0(ref0), bounds0(bounds0), depth0(depth0), ref1(ref1), bounds1(bounds1), depth1(depth1) {} - - NodeRef ref0; - BBox3fa bounds0; - size_t depth0; - NodeRef ref1; - BBox3fa bounds1; - size_t depth1; - }; - - typedef vector_t<CollideJob, aligned_allocator<CollideJob,16>> jobvector; - - void split(const CollideJob& job, jobvector& jobs); - - public: - __forceinline BVHNCollider (Scene* scene0, Scene* scene1, RTCCollideFunc callback, void* userPtr) - : scene0(scene0), scene1(scene1), callback(callback), userPtr(userPtr) {} - - public: - virtual void processLeaf(NodeRef leaf0, NodeRef leaf1) = 0; - void collide_recurse(NodeRef node0, const BBox3fa& bounds0, NodeRef node1, const BBox3fa& bounds1, size_t depth0, size_t depth1); - void collide_recurse_entry(NodeRef node0, const BBox3fa& bounds0, NodeRef node1, const BBox3fa& bounds1); - - protected: - Scene* scene0; - Scene* scene1; - RTCCollideFunc callback; - void* userPtr; - }; - - template<int N> - class BVHNColliderUserGeom : public BVHNCollider<N> - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::AABBNode AABBNode; - - __forceinline BVHNColliderUserGeom (Scene* scene0, Scene* scene1, RTCCollideFunc callback, void* userPtr) - : BVHNCollider<N>(scene0,scene1,callback,userPtr) {} - - virtual void processLeaf(NodeRef leaf0, NodeRef leaf1); - public: - static void collide(BVH* __restrict__ bvh0, BVH* __restrict__ bvh1, RTCCollideFunc callback, void* userPtr); - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_factory.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_factory.h deleted file mode 100644 index 54021ca6eb..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_factory.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../bvh/bvh.h" -#include "../common/isa.h" -#include "../common/accel.h" -#include "../common/scene.h" -#include "../geometry/curve_intersector_virtual.h" - -namespace embree -{ - /*! BVH instantiations */ - class BVHFactory - { - public: - enum class BuildVariant { STATIC, DYNAMIC, HIGH_QUALITY }; - enum class IntersectVariant { FAST, ROBUST }; - }; -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.cpp deleted file mode 100644 index ea6adc2717..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.cpp +++ /dev/null @@ -1,330 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "bvh_intersector1.h" -#include "node_intersector1.h" -#include "bvh_traverser1.h" - -#include "../geometry/intersector_iterators.h" -#include "../geometry/triangle_intersector.h" -#include "../geometry/trianglev_intersector.h" -#include "../geometry/trianglev_mb_intersector.h" -#include "../geometry/trianglei_intersector.h" -#include "../geometry/quadv_intersector.h" -#include "../geometry/quadi_intersector.h" -#include "../geometry/curveNv_intersector.h" -#include "../geometry/curveNi_intersector.h" -#include "../geometry/curveNi_mb_intersector.h" -#include "../geometry/linei_intersector.h" -#include "../geometry/subdivpatch1_intersector.h" -#include "../geometry/object_intersector.h" -#include "../geometry/instance_intersector.h" -#include "../geometry/subgrid_intersector.h" -#include "../geometry/subgrid_mb_intersector.h" -#include "../geometry/curve_intersector_virtual.h" - -namespace embree -{ - namespace isa - { - template<int N, int types, bool robust, typename PrimitiveIntersector1> - void BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::intersect(const Accel::Intersectors* __restrict__ This, - RayHit& __restrict__ ray, - IntersectContext* __restrict__ context) - { - const BVH* __restrict__ bvh = (const BVH*)This->ptr; - - /* we may traverse an empty BVH in case all geometry was invalid */ - if (bvh->root == BVH::emptyNode) - return; - - /* perform per ray precalculations required by the primitive intersector */ - Precalculations pre(ray, bvh); - - /* stack state */ - StackItemT<NodeRef> stack[stackSize]; // stack of nodes - StackItemT<NodeRef>* stackPtr = stack+1; // current stack pointer - StackItemT<NodeRef>* stackEnd = stack+stackSize; - stack[0].ptr = bvh->root; - stack[0].dist = neg_inf; - - if (bvh->root == BVH::emptyNode) - return; - - /* filter out invalid rays */ -#if defined(EMBREE_IGNORE_INVALID_RAYS) - if (!ray.valid()) return; -#endif - /* verify correct input */ - assert(ray.valid()); - assert(ray.tnear() >= 0.0f); - assert(!(types & BVH_MB) || (ray.time() >= 0.0f && ray.time() <= 1.0f)); - - /* load the ray into SIMD registers */ - TravRay<N,Nx,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f)); - - /* initialize the node traverser */ - BVHNNodeTraverser1Hit<N, Nx, types> nodeTraverser; - - /* pop loop */ - while (true) pop: - { - /* pop next node */ - if (unlikely(stackPtr == stack)) break; - stackPtr--; - NodeRef cur = NodeRef(stackPtr->ptr); - - /* if popped node is too far, pop next one */ -#if defined(__AVX512ER__) - /* much faster on KNL */ - if (unlikely(any(vfloat<Nx>(*(float*)&stackPtr->dist) > tray.tfar))) - continue; -#else - if (unlikely(*(float*)&stackPtr->dist > ray.tfar)) - continue; -#endif - - /* downtraversal loop */ - while (true) - { - /* intersect node */ - size_t mask; vfloat<Nx> tNear; - STAT3(normal.trav_nodes,1,1,1); - bool nodeIntersected = BVHNNodeIntersector1<N, Nx, types, robust>::intersect(cur, tray, ray.time(), tNear, mask); - if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; } - - /* if no child is hit, pop next node */ - if (unlikely(mask == 0)) - goto pop; - - /* select next child and push other children */ - nodeTraverser.traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd); - } - - /* this is a leaf node */ - assert(cur != BVH::emptyNode); - STAT3(normal.trav_leaves,1,1,1); - size_t num; Primitive* prim = (Primitive*)cur.leaf(num); - size_t lazy_node = 0; - PrimitiveIntersector1::intersect(This, pre, ray, context, prim, num, tray, lazy_node); - tray.tfar = ray.tfar; - - /* push lazy node onto stack */ - if (unlikely(lazy_node)) { - stackPtr->ptr = lazy_node; - stackPtr->dist = neg_inf; - stackPtr++; - } - } - } - - template<int N, int types, bool robust, typename PrimitiveIntersector1> - void BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::occluded(const Accel::Intersectors* __restrict__ This, - Ray& __restrict__ ray, - IntersectContext* __restrict__ context) - { - const BVH* __restrict__ bvh = (const BVH*)This->ptr; - - /* we may traverse an empty BVH in case all geometry was invalid */ - if (bvh->root == BVH::emptyNode) - return; - - /* early out for already occluded rays */ - if (unlikely(ray.tfar < 0.0f)) - return; - - /* perform per ray precalculations required by the primitive intersector */ - Precalculations pre(ray, bvh); - - /* stack state */ - NodeRef stack[stackSize]; // stack of nodes that still need to get traversed - NodeRef* stackPtr = stack+1; // current stack pointer - NodeRef* stackEnd = stack+stackSize; - stack[0] = bvh->root; - - /* filter out invalid rays */ -#if defined(EMBREE_IGNORE_INVALID_RAYS) - if (!ray.valid()) return; -#endif - - /* verify correct input */ - assert(ray.valid()); - assert(ray.tnear() >= 0.0f); - assert(!(types & BVH_MB) || (ray.time() >= 0.0f && ray.time() <= 1.0f)); - - /* load the ray into SIMD registers */ - TravRay<N,Nx,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f)); - - /* initialize the node traverser */ - BVHNNodeTraverser1Hit<N, Nx, types> nodeTraverser; - - /* pop loop */ - while (true) pop: - { - /* pop next node */ - if (unlikely(stackPtr == stack)) break; - stackPtr--; - NodeRef cur = (NodeRef)*stackPtr; - - /* downtraversal loop */ - while (true) - { - /* intersect node */ - size_t mask; vfloat<Nx> tNear; - STAT3(shadow.trav_nodes,1,1,1); - bool nodeIntersected = BVHNNodeIntersector1<N, Nx, types, robust>::intersect(cur, tray, ray.time(), tNear, mask); - if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; } - - /* if no child is hit, pop next node */ - if (unlikely(mask == 0)) - goto pop; - - /* select next child and push other children */ - nodeTraverser.traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd); - } - - /* this is a leaf node */ - assert(cur != BVH::emptyNode); - STAT3(shadow.trav_leaves,1,1,1); - size_t num; Primitive* prim = (Primitive*)cur.leaf(num); - size_t lazy_node = 0; - if (PrimitiveIntersector1::occluded(This, pre, ray, context, prim, num, tray, lazy_node)) { - ray.tfar = neg_inf; - break; - } - - /* push lazy node onto stack */ - if (unlikely(lazy_node)) { - *stackPtr = (NodeRef)lazy_node; - stackPtr++; - } - } - } - - template<int N, int types, bool robust, typename PrimitiveIntersector1> - struct PointQueryDispatch - { - typedef typename PrimitiveIntersector1::Precalculations Precalculations; - typedef typename PrimitiveIntersector1::Primitive Primitive; - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::AABBNode AABBNode; - typedef typename BVH::AABBNodeMB4D AABBNodeMB4D; - - static const size_t stackSize = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store - - /* right now AVX512KNL SIMD extension only for standard node types */ - static const size_t Nx = (types == BVH_AN1 || types == BVH_QN1) ? vextend<N>::size : N; - - static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) - { - const BVH* __restrict__ bvh = (const BVH*)This->ptr; - - /* we may traverse an empty BVH in case all geometry was invalid */ - if (bvh->root == BVH::emptyNode) - return false; - - /* stack state */ - StackItemT<NodeRef> stack[stackSize]; // stack of nodes - StackItemT<NodeRef>* stackPtr = stack+1; // current stack pointer - StackItemT<NodeRef>* stackEnd = stack+stackSize; - stack[0].ptr = bvh->root; - stack[0].dist = neg_inf; - - /* verify correct input */ - assert(!(types & BVH_MB) || (query->time >= 0.0f && query->time <= 1.0f)); - - /* load the point query into SIMD registers */ - TravPointQuery<N> tquery(query->p, context->query_radius); - - /* initialize the node traverser */ - BVHNNodeTraverser1Hit<N, N, types> nodeTraverser; - - bool changed = false; - float cull_radius = context->query_type == POINT_QUERY_TYPE_SPHERE - ? query->radius * query->radius - : dot(context->query_radius, context->query_radius); - - /* pop loop */ - while (true) pop: - { - /* pop next node */ - if (unlikely(stackPtr == stack)) break; - stackPtr--; - NodeRef cur = NodeRef(stackPtr->ptr); - - /* if popped node is too far, pop next one */ - if (unlikely(*(float*)&stackPtr->dist > cull_radius)) - continue; - - /* downtraversal loop */ - while (true) - { - /* intersect node */ - size_t mask; vfloat<N> tNear; - STAT3(point_query.trav_nodes,1,1,1); - bool nodeIntersected; - if (likely(context->query_type == POINT_QUERY_TYPE_SPHERE)) { - nodeIntersected = BVHNNodePointQuerySphere1<N, types>::pointQuery(cur, tquery, query->time, tNear, mask); - } else { - nodeIntersected = BVHNNodePointQueryAABB1 <N, types>::pointQuery(cur, tquery, query->time, tNear, mask); - } - if (unlikely(!nodeIntersected)) { STAT3(point_query.trav_nodes,-1,-1,-1); break; } - - /* if no child is hit, pop next node */ - if (unlikely(mask == 0)) - goto pop; - - /* select next child and push other children */ - nodeTraverser.traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd); - } - - /* this is a leaf node */ - assert(cur != BVH::emptyNode); - STAT3(point_query.trav_leaves,1,1,1); - size_t num; Primitive* prim = (Primitive*)cur.leaf(num); - size_t lazy_node = 0; - if (PrimitiveIntersector1::pointQuery(This, query, context, prim, num, tquery, lazy_node)) - { - changed = true; - tquery.rad = context->query_radius; - cull_radius = context->query_type == POINT_QUERY_TYPE_SPHERE - ? query->radius * query->radius - : dot(context->query_radius, context->query_radius); - } - - /* push lazy node onto stack */ - if (unlikely(lazy_node)) { - stackPtr->ptr = lazy_node; - stackPtr->dist = neg_inf; - stackPtr++; - } - } - return changed; - } - }; - - /* disable point queries for not yet supported geometry types */ - template<int N, int types, bool robust> - struct PointQueryDispatch<N, types, robust, VirtualCurveIntersector1> { - static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; } - }; - - template<int N, int types, bool robust> - struct PointQueryDispatch<N, types, robust, SubdivPatch1Intersector1> { - static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; } - }; - - template<int N, int types, bool robust> - struct PointQueryDispatch<N, types, robust, SubdivPatch1MBIntersector1> { - static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; } - }; - - template<int N, int types, bool robust, typename PrimitiveIntersector1> - bool BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::pointQuery( - const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) - { - return PointQueryDispatch<N, types, robust, PrimitiveIntersector1>::pointQuery(This, query, context); - } - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.h deleted file mode 100644 index 1a269c319a..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.h +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh.h" -#include "../common/ray.h" -#include "../common/point_query.h" - -namespace embree -{ - namespace isa - { - /*! BVH single ray intersector. */ - template<int N, int types, bool robust, typename PrimitiveIntersector1> - class BVHNIntersector1 - { - /* shortcuts for frequently used types */ - typedef typename PrimitiveIntersector1::Precalculations Precalculations; - typedef typename PrimitiveIntersector1::Primitive Primitive; - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::AABBNode AABBNode; - typedef typename BVH::AABBNodeMB4D AABBNodeMB4D; - - static const size_t stackSize = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store - - /* right now AVX512KNL SIMD extension only for standard node types */ - static const size_t Nx = (types == BVH_AN1 || types == BVH_QN1) ? vextend<N>::size : N; - - public: - static void intersect (const Accel::Intersectors* This, RayHit& ray, IntersectContext* context); - static void occluded (const Accel::Intersectors* This, Ray& ray, IntersectContext* context); - static bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context); - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1_bvh4.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1_bvh4.cpp deleted file mode 100644 index 989f7354fd..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1_bvh4.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "bvh_intersector1.cpp" - -namespace embree -{ - namespace isa - { - int getISA() { - return VerifyMultiTargetLinking::getISA(); - } - - //////////////////////////////////////////////////////////////////////////////// - /// BVH4Intersector1 Definitions - //////////////////////////////////////////////////////////////////////////////// - - IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersector1,BVHNIntersector1<4 COMMA BVH_AN1_UN1 COMMA false COMMA VirtualCurveIntersector1 >)); - IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersector1MB,BVHNIntersector1<4 COMMA BVH_AN2_AN4D_UN2 COMMA false COMMA VirtualCurveIntersector1 >)); - - IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersectorRobust1,BVHNIntersector1<4 COMMA BVH_AN1_UN1 COMMA true COMMA VirtualCurveIntersector1 >)); - IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersectorRobust1MB,BVHNIntersector1<4 COMMA BVH_AN2_AN4D_UN2 COMMA true COMMA VirtualCurveIntersector1 >)); - - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4Intersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMIntersector1Moeller <SIMD_MODE(4) COMMA true> > >)); - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Moeller <SIMD_MODE(4) COMMA true> > >)); - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMvIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >)); - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >)); - - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMvMBIntersector1Moeller <SIMD_MODE(4) COMMA true> > >)); - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMiMBIntersector1Moeller <SIMD_MODE(4) COMMA true> > >)); - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMvMBIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >)); - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMiMBIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >)); - - IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4vIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<QuadMvIntersector1Moeller <4 COMMA true> > >)); - IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<QuadMiIntersector1Moeller <4 COMMA true> > >)); - IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4vIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<QuadMvIntersector1Pluecker<4 COMMA true> > >)); - IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<QuadMiIntersector1Pluecker<4 COMMA true> > >)); - - IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<QuadMiMBIntersector1Moeller <4 COMMA true> > >)); - IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<QuadMiMBIntersector1Pluecker<4 COMMA true> > >)); - - IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR1(BVH4SubdivPatch1Intersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector1>)); - IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR1(BVH4SubdivPatch1MBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA SubdivPatch1MBIntersector1>)); - - IF_ENABLED_USER(DEFINE_INTERSECTOR1(BVH4VirtualIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<ObjectIntersector1<false>> >)); - IF_ENABLED_USER(DEFINE_INTERSECTOR1(BVH4VirtualMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<ObjectIntersector1<true>> >)); - - IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR1(BVH4InstanceIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<InstanceIntersector1> >)); - IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR1(BVH4InstanceMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<InstanceIntersector1MB> >)); - - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(QBVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_QN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >)); - IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(QBVH4Quad4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_QN1 COMMA false COMMA ArrayIntersector1<QuadMiIntersector1Pluecker<4 COMMA true> > >)); - - IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridIntersector1Moeller,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersector1Moeller<4 COMMA true> >)); - IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridMBIntersector1Moeller,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA SubGridMBIntersector1Pluecker<4 COMMA true> >)); - - IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA SubGridIntersector1Pluecker<4 COMMA true> >)); - //IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA SubGridMBIntersector1Pluecker<4 COMMA true> >)); - - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_hybrid.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_hybrid.h deleted file mode 100644 index d764cc928d..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_hybrid.h +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh.h" -#include "../common/ray.h" -#include "../common/stack_item.h" -#include "node_intersector_frustum.h" - -namespace embree -{ - namespace isa - { - template<int K, bool robust> - struct TravRayK; - - /*! BVH hybrid packet intersector. Switches between packet and single ray traversal (optional). */ - template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single = true> - class BVHNIntersectorKHybrid - { - /* right now AVX512KNL SIMD extension only for standard node types */ - static const size_t Nx = types == BVH_AN1 ? vextend<N>::size : N; - - /* shortcuts for frequently used types */ - typedef typename PrimitiveIntersectorK::Precalculations Precalculations; - typedef typename PrimitiveIntersectorK::Primitive Primitive; - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::BaseNode BaseNode; - typedef typename BVH::AABBNode AABBNode; - - static const size_t stackSizeSingle = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store - static const size_t stackSizeChunk = 1+(N-1)*BVH::maxDepth; - - static const size_t switchThresholdIncoherent = \ - (K==4) ? 3 : - (K==8) ? ((N==4) ? 5 : 7) : - (K==16) ? 14 : // 14 seems to work best for KNL due to better ordered chunk traversal - 0; - - private: - static void intersect1(Accel::Intersectors* This, const BVH* bvh, NodeRef root, size_t k, Precalculations& pre, - RayHitK<K>& ray, const TravRayK<K, robust>& tray, IntersectContext* context); - static bool occluded1(Accel::Intersectors* This, const BVH* bvh, NodeRef root, size_t k, Precalculations& pre, - RayK<K>& ray, const TravRayK<K, robust>& tray, IntersectContext* context); - - public: - static void intersect(vint<K>* valid, Accel::Intersectors* This, RayHitK<K>& ray, IntersectContext* context); - static void occluded (vint<K>* valid, Accel::Intersectors* This, RayK<K>& ray, IntersectContext* context); - - static void intersectCoherent(vint<K>* valid, Accel::Intersectors* This, RayHitK<K>& ray, IntersectContext* context); - static void occludedCoherent (vint<K>* valid, Accel::Intersectors* This, RayK<K>& ray, IntersectContext* context); - - }; - - /*! BVH packet intersector. */ - template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK> - class BVHNIntersectorKChunk : public BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, false> {}; - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream.h deleted file mode 100644 index 83d1fb4d3d..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream.h +++ /dev/null @@ -1,295 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "node_intersector_packet_stream.h" -#include "node_intersector_frustum.h" -#include "bvh_traverser_stream.h" - -namespace embree -{ - namespace isa - { - /*! BVH ray stream intersector. */ - template<int N, int Nx, int types, bool robust, typename PrimitiveIntersector> - class BVHNIntersectorStream - { - static const int Nxd = (Nx == N) ? N : Nx/2; - - /* shortcuts for frequently used types */ - template<int K> using PrimitiveIntersectorK = typename PrimitiveIntersector::template Type<K>; - template<int K> using PrimitiveK = typename PrimitiveIntersectorK<K>::PrimitiveK; - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::BaseNode BaseNode; - typedef typename BVH::AABBNode AABBNode; - typedef typename BVH::AABBNodeMB AABBNodeMB; - - template<int K> - __forceinline static size_t initPacketsAndFrustum(RayK<K>** inputPackets, size_t numOctantRays, - TravRayKStream<K, robust>* packets, Frustum<robust>& frustum, bool& commonOctant) - { - const size_t numPackets = (numOctantRays+K-1)/K; - - Vec3vf<K> tmp_min_rdir(pos_inf); - Vec3vf<K> tmp_max_rdir(neg_inf); - Vec3vf<K> tmp_min_org(pos_inf); - Vec3vf<K> tmp_max_org(neg_inf); - vfloat<K> tmp_min_dist(pos_inf); - vfloat<K> tmp_max_dist(neg_inf); - - size_t m_active = 0; - for (size_t i = 0; i < numPackets; i++) - { - const vfloat<K> tnear = inputPackets[i]->tnear(); - const vfloat<K> tfar = inputPackets[i]->tfar; - vbool<K> m_valid = (tnear <= tfar) & (tnear >= 0.0f); - -#if defined(EMBREE_IGNORE_INVALID_RAYS) - m_valid &= inputPackets[i]->valid(); -#endif - - m_active |= (size_t)movemask(m_valid) << (i*K); - - vfloat<K> packet_min_dist = max(tnear, 0.0f); - vfloat<K> packet_max_dist = select(m_valid, tfar, neg_inf); - tmp_min_dist = min(tmp_min_dist, packet_min_dist); - tmp_max_dist = max(tmp_max_dist, packet_max_dist); - - const Vec3vf<K>& org = inputPackets[i]->org; - const Vec3vf<K>& dir = inputPackets[i]->dir; - - new (&packets[i]) TravRayKStream<K, robust>(org, dir, packet_min_dist, packet_max_dist); - - tmp_min_rdir = min(tmp_min_rdir, select(m_valid, packets[i].rdir, Vec3vf<K>(pos_inf))); - tmp_max_rdir = max(tmp_max_rdir, select(m_valid, packets[i].rdir, Vec3vf<K>(neg_inf))); - tmp_min_org = min(tmp_min_org , select(m_valid,org , Vec3vf<K>(pos_inf))); - tmp_max_org = max(tmp_max_org , select(m_valid,org , Vec3vf<K>(neg_inf))); - } - - m_active &= (numOctantRays == (8 * sizeof(size_t))) ? (size_t)-1 : (((size_t)1 << numOctantRays)-1); - - - const Vec3fa reduced_min_rdir(reduce_min(tmp_min_rdir.x), - reduce_min(tmp_min_rdir.y), - reduce_min(tmp_min_rdir.z)); - - const Vec3fa reduced_max_rdir(reduce_max(tmp_max_rdir.x), - reduce_max(tmp_max_rdir.y), - reduce_max(tmp_max_rdir.z)); - - const Vec3fa reduced_min_origin(reduce_min(tmp_min_org.x), - reduce_min(tmp_min_org.y), - reduce_min(tmp_min_org.z)); - - const Vec3fa reduced_max_origin(reduce_max(tmp_max_org.x), - reduce_max(tmp_max_org.y), - reduce_max(tmp_max_org.z)); - - commonOctant = - (reduced_max_rdir.x < 0.0f || reduced_min_rdir.x >= 0.0f) && - (reduced_max_rdir.y < 0.0f || reduced_min_rdir.y >= 0.0f) && - (reduced_max_rdir.z < 0.0f || reduced_min_rdir.z >= 0.0f); - - const float frustum_min_dist = reduce_min(tmp_min_dist); - const float frustum_max_dist = reduce_max(tmp_max_dist); - - frustum.init(reduced_min_origin, reduced_max_origin, - reduced_min_rdir, reduced_max_rdir, - frustum_min_dist, frustum_max_dist, - N); - - return m_active; - } - - template<int K> - __forceinline static size_t intersectAABBNodePacket(size_t m_active, - const TravRayKStream<K,robust>* packets, - const AABBNode* __restrict__ node, - size_t boxID, - const NearFarPrecalculations& nf) - { - assert(m_active); - const size_t startPacketID = bsf(m_active) / K; - const size_t endPacketID = bsr(m_active) / K; - size_t m_trav_active = 0; - for (size_t i = startPacketID; i <= endPacketID; i++) - { - const size_t m_hit = intersectNodeK<N>(node, boxID, packets[i], nf); - m_trav_active |= m_hit << (i*K); - } - return m_trav_active; - } - - template<int K> - __forceinline static size_t traverseCoherentStream(size_t m_active, - TravRayKStream<K, robust>* packets, - const AABBNode* __restrict__ node, - const Frustum<robust>& frustum, - size_t* maskK, - vfloat<Nx>& dist) - { - size_t m_node_hit = intersectNodeFrustum<N,Nx>(node, frustum, dist); - const size_t first_index = bsf(m_active); - const size_t first_packetID = first_index / K; - const size_t first_rayID = first_index % K; - size_t m_first_hit = intersectNode1<N,Nx>(node, packets[first_packetID], first_rayID, frustum.nf); - - /* this make traversal independent of the ordering of rays */ - size_t m_node = m_node_hit ^ m_first_hit; - while (unlikely(m_node)) - { - const size_t boxID = bscf(m_node); - const size_t m_current = m_active & intersectAABBNodePacket(m_active, packets, node, boxID, frustum.nf); - m_node_hit ^= m_current ? (size_t)0 : ((size_t)1 << boxID); - maskK[boxID] = m_current; - } - return m_node_hit; - } - - // TODO: explicit 16-wide path for KNL - template<int K> - __forceinline static vint<Nx> traverseIncoherentStream(size_t m_active, - TravRayKStreamFast<K>* __restrict__ packets, - const AABBNode* __restrict__ node, - const NearFarPrecalculations& nf, - const int shiftTable[32]) - { - const vfloat<Nx> bminX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX)); - const vfloat<Nx> bminY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY)); - const vfloat<Nx> bminZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ)); - const vfloat<Nx> bmaxX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX)); - const vfloat<Nx> bmaxY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY)); - const vfloat<Nx> bmaxZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ)); - assert(m_active); - vint<Nx> vmask(zero); - do - { - STAT3(shadow.trav_nodes,1,1,1); - const size_t rayID = bscf(m_active); - assert(rayID < MAX_INTERNAL_STREAM_SIZE); - TravRayKStream<K,robust> &p = packets[rayID / K]; - const size_t i = rayID % K; - const vint<Nx> bitmask(shiftTable[rayID]); - -#if defined (__aarch64__) - const vfloat<Nx> tNearX = madd(bminX, p.rdir.x[i], p.neg_org_rdir.x[i]); - const vfloat<Nx> tNearY = madd(bminY, p.rdir.y[i], p.neg_org_rdir.y[i]); - const vfloat<Nx> tNearZ = madd(bminZ, p.rdir.z[i], p.neg_org_rdir.z[i]); - const vfloat<Nx> tFarX = madd(bmaxX, p.rdir.x[i], p.neg_org_rdir.x[i]); - const vfloat<Nx> tFarY = madd(bmaxY, p.rdir.y[i], p.neg_org_rdir.y[i]); - const vfloat<Nx> tFarZ = madd(bmaxZ, p.rdir.z[i], p.neg_org_rdir.z[i]); -#else - const vfloat<Nx> tNearX = msub(bminX, p.rdir.x[i], p.org_rdir.x[i]); - const vfloat<Nx> tNearY = msub(bminY, p.rdir.y[i], p.org_rdir.y[i]); - const vfloat<Nx> tNearZ = msub(bminZ, p.rdir.z[i], p.org_rdir.z[i]); - const vfloat<Nx> tFarX = msub(bmaxX, p.rdir.x[i], p.org_rdir.x[i]); - const vfloat<Nx> tFarY = msub(bmaxY, p.rdir.y[i], p.org_rdir.y[i]); - const vfloat<Nx> tFarZ = msub(bmaxZ, p.rdir.z[i], p.org_rdir.z[i]); -#endif - - const vfloat<Nx> tNear = maxi(tNearX, tNearY, tNearZ, vfloat<Nx>(p.tnear[i])); - const vfloat<Nx> tFar = mini(tFarX , tFarY , tFarZ, vfloat<Nx>(p.tfar[i])); - -#if defined(__AVX512ER__) - const vboolx m_node((1 << N)-1); - const vbool<Nx> hit_mask = le(m_node, tNear, tFar); - vmask = mask_or(hit_mask, vmask, vmask, bitmask); -#else - const vbool<Nx> hit_mask = tNear <= tFar; -#if defined(__AVX2__) - vmask = vmask | (bitmask & vint<Nx>(hit_mask)); -#else - vmask = select(hit_mask, vmask | bitmask, vmask); -#endif -#endif - } while(m_active); - return vmask; - } - - template<int K> - __forceinline static vint<Nx> traverseIncoherentStream(size_t m_active, - TravRayKStreamRobust<K>* __restrict__ packets, - const AABBNode* __restrict__ node, - const NearFarPrecalculations& nf, - const int shiftTable[32]) - { - const vfloat<Nx> bminX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX)); - const vfloat<Nx> bminY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY)); - const vfloat<Nx> bminZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ)); - const vfloat<Nx> bmaxX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX)); - const vfloat<Nx> bmaxY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY)); - const vfloat<Nx> bmaxZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ)); - assert(m_active); - vint<Nx> vmask(zero); - do - { - STAT3(shadow.trav_nodes,1,1,1); - const size_t rayID = bscf(m_active); - assert(rayID < MAX_INTERNAL_STREAM_SIZE); - TravRayKStream<K,robust> &p = packets[rayID / K]; - const size_t i = rayID % K; - const vint<Nx> bitmask(shiftTable[rayID]); - const vfloat<Nx> tNearX = (bminX - p.org.x[i]) * p.rdir.x[i]; - const vfloat<Nx> tNearY = (bminY - p.org.y[i]) * p.rdir.y[i]; - const vfloat<Nx> tNearZ = (bminZ - p.org.z[i]) * p.rdir.z[i]; - const vfloat<Nx> tFarX = (bmaxX - p.org.x[i]) * p.rdir.x[i]; - const vfloat<Nx> tFarY = (bmaxY - p.org.y[i]) * p.rdir.y[i]; - const vfloat<Nx> tFarZ = (bmaxZ - p.org.z[i]) * p.rdir.z[i]; - const vfloat<Nx> tNear = maxi(tNearX, tNearY, tNearZ, vfloat<Nx>(p.tnear[i])); - const vfloat<Nx> tFar = mini(tFarX , tFarY , tFarZ, vfloat<Nx>(p.tfar[i])); - const float round_down = 1.0f-2.0f*float(ulp); - const float round_up = 1.0f+2.0f*float(ulp); -#if defined(__AVX512ER__) - const vboolx m_node((1 << N)-1); - const vbool<Nx> hit_mask = le(m_node, round_down*tNear, round_up*tFar); - vmask = mask_or(hit_mask, vmask, vmask, bitmask); -#else - const vbool<Nx> hit_mask = round_down*tNear <= round_up*tFar; -#if defined(__AVX2__) - vmask = vmask | (bitmask & vint<Nx>(hit_mask)); -#else - vmask = select(hit_mask, vmask | bitmask, vmask); -#endif -#endif - } while(m_active); - return vmask; - } - - - static const size_t stackSizeSingle = 1+(N-1)*BVH::maxDepth; - - public: - static void intersect(Accel::Intersectors* This, RayHitN** inputRays, size_t numRays, IntersectContext* context); - static void occluded (Accel::Intersectors* This, RayN** inputRays, size_t numRays, IntersectContext* context); - - private: - template<int K> - static void intersectCoherent(Accel::Intersectors* This, RayHitK<K>** inputRays, size_t numRays, IntersectContext* context); - - template<int K> - static void occludedCoherent(Accel::Intersectors* This, RayK<K>** inputRays, size_t numRays, IntersectContext* context); - - template<int K> - static void occludedIncoherent(Accel::Intersectors* This, RayK<K>** inputRays, size_t numRays, IntersectContext* context); - }; - - - /*! BVH ray stream intersector with direct fallback to packets. */ - template<int N, int Nx> - class BVHNIntersectorStreamPacketFallback - { - public: - static void intersect(Accel::Intersectors* This, RayHitN** inputRays, size_t numRays, IntersectContext* context); - static void occluded (Accel::Intersectors* This, RayN** inputRays, size_t numRays, IntersectContext* context); - - private: - template<int K> - static void intersectK(Accel::Intersectors* This, RayHitK<K>** inputRays, size_t numRays, IntersectContext* context); - - template<int K> - static void occludedK(Accel::Intersectors* This, RayK<K>** inputRays, size_t numRays, IntersectContext* context); - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream_filters.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream_filters.h deleted file mode 100644 index cdeb923637..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream_filters.h +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/default.h" -#include "../common/ray.h" -#include "../common/scene.h" - -namespace embree -{ - namespace isa - { - class RayStreamFilter - { - public: - static void intersectAOS(Scene* scene, RTCRayHit* rays, size_t N, size_t stride, IntersectContext* context); - static void intersectAOP(Scene* scene, RTCRayHit** rays, size_t N, IntersectContext* context); - static void intersectSOA(Scene* scene, char* rays, size_t N, size_t numPackets, size_t stride, IntersectContext* context); - static void intersectSOP(Scene* scene, const RTCRayHitNp* rays, size_t N, IntersectContext* context); - - static void occludedAOS(Scene* scene, RTCRay* rays, size_t N, size_t stride, IntersectContext* context); - static void occludedAOP(Scene* scene, RTCRay** rays, size_t N, IntersectContext* context); - static void occludedSOA(Scene* scene, char* rays, size_t N, size_t numPackets, size_t stride, IntersectContext* context); - static void occludedSOP(Scene* scene, const RTCRayNp* rays, size_t N, IntersectContext* context); - - private: - template<int K, bool intersect> - static void filterAOS(Scene* scene, void* rays, size_t N, size_t stride, IntersectContext* context); - - template<int K, bool intersect> - static void filterAOP(Scene* scene, void** rays, size_t N, IntersectContext* context); - - template<int K, bool intersect> - static void filterSOA(Scene* scene, char* rays, size_t N, size_t numPackets, size_t stride, IntersectContext* context); - - template<int K, bool intersect> - static void filterSOP(Scene* scene, const void* rays, size_t N, IntersectContext* context); - }; - } -}; diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb.h deleted file mode 100644 index baa4a8d805..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb.h +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh_node_base.h" - -namespace embree -{ - /*! BVHN AABBNode */ - template<typename NodeRef, int N> - struct AABBNode_t : public BaseNode_t<NodeRef, N> - { - using BaseNode_t<NodeRef,N>::children; - - struct Create - { - __forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc, size_t numChildren = 0) const - { - AABBNode_t* node = (AABBNode_t*) alloc.malloc0(sizeof(AABBNode_t),NodeRef::byteNodeAlignment); node->clear(); - return NodeRef::encodeNode(node); - } - }; - - struct Set - { - __forceinline void operator() (NodeRef node, size_t i, NodeRef child, const BBox3fa& bounds) const { - node.getAABBNode()->setRef(i,child); - node.getAABBNode()->setBounds(i,bounds); - } - }; - - struct Create2 - { - template<typename BuildRecord> - __forceinline NodeRef operator() (BuildRecord* children, const size_t num, const FastAllocator::CachedAllocator& alloc) const - { - AABBNode_t* node = (AABBNode_t*) alloc.malloc0(sizeof(AABBNode_t), NodeRef::byteNodeAlignment); node->clear(); - for (size_t i=0; i<num; i++) node->setBounds(i,children[i].bounds()); - return NodeRef::encodeNode(node); - } - }; - - struct Set2 - { - template<typename BuildRecord> - __forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const - { - AABBNode_t* node = ref.getAABBNode(); - for (size_t i=0; i<num; i++) node->setRef(i,children[i]); - return ref; - } - }; - - struct Set3 - { - Set3 (FastAllocator* allocator, PrimRef* prims) - : allocator(allocator), prims(prims) {} - - template<typename BuildRecord> - __forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const - { - AABBNode_t* node = ref.getAABBNode(); - for (size_t i=0; i<num; i++) node->setRef(i,children[i]); - - if (unlikely(precord.alloc_barrier)) - { - PrimRef* begin = &prims[precord.prims.begin()]; - PrimRef* end = &prims[precord.prims.end()]; // FIXME: extended end for spatial split builder!!!!! - size_t bytes = (size_t)end - (size_t)begin; - allocator->addBlock(begin,bytes); - } - - return ref; - } - - FastAllocator* const allocator; - PrimRef* const prims; - }; - - /*! Clears the node. */ - __forceinline void clear() { - lower_x = lower_y = lower_z = pos_inf; - upper_x = upper_y = upper_z = neg_inf; - BaseNode_t<NodeRef,N>::clear(); - } - - /*! Sets bounding box and ID of child. */ - __forceinline void setRef(size_t i, const NodeRef& ref) { - assert(i < N); - children[i] = ref; - } - - /*! Sets bounding box of child. */ - __forceinline void setBounds(size_t i, const BBox3fa& bounds) - { - assert(i < N); - lower_x[i] = bounds.lower.x; lower_y[i] = bounds.lower.y; lower_z[i] = bounds.lower.z; - upper_x[i] = bounds.upper.x; upper_y[i] = bounds.upper.y; upper_z[i] = bounds.upper.z; - } - - /*! Sets bounding box and ID of child. */ - __forceinline void set(size_t i, const NodeRef& ref, const BBox3fa& bounds) { - setBounds(i,bounds); - children[i] = ref; - } - - /*! Returns bounds of node. */ - __forceinline BBox3fa bounds() const { - const Vec3fa lower(reduce_min(lower_x),reduce_min(lower_y),reduce_min(lower_z)); - const Vec3fa upper(reduce_max(upper_x),reduce_max(upper_y),reduce_max(upper_z)); - return BBox3fa(lower,upper); - } - - /*! Returns bounds of specified child. */ - __forceinline BBox3fa bounds(size_t i) const - { - assert(i < N); - const Vec3fa lower(lower_x[i],lower_y[i],lower_z[i]); - const Vec3fa upper(upper_x[i],upper_y[i],upper_z[i]); - return BBox3fa(lower,upper); - } - - /*! Returns extent of bounds of specified child. */ - __forceinline Vec3fa extend(size_t i) const { - return bounds(i).size(); - } - - /*! Returns bounds of all children (implemented later as specializations) */ - __forceinline void bounds(BBox<vfloat4>& bounds0, BBox<vfloat4>& bounds1, BBox<vfloat4>& bounds2, BBox<vfloat4>& bounds3) const; - - /*! swap two children of the node */ - __forceinline void swap(size_t i, size_t j) - { - assert(i<N && j<N); - std::swap(children[i],children[j]); - std::swap(lower_x[i],lower_x[j]); - std::swap(lower_y[i],lower_y[j]); - std::swap(lower_z[i],lower_z[j]); - std::swap(upper_x[i],upper_x[j]); - std::swap(upper_y[i],upper_y[j]); - std::swap(upper_z[i],upper_z[j]); - } - - /*! swap the children of two nodes */ - __forceinline static void swap(AABBNode_t* a, size_t i, AABBNode_t* b, size_t j) - { - assert(i<N && j<N); - std::swap(a->children[i],b->children[j]); - std::swap(a->lower_x[i],b->lower_x[j]); - std::swap(a->lower_y[i],b->lower_y[j]); - std::swap(a->lower_z[i],b->lower_z[j]); - std::swap(a->upper_x[i],b->upper_x[j]); - std::swap(a->upper_y[i],b->upper_y[j]); - std::swap(a->upper_z[i],b->upper_z[j]); - } - - /*! compacts a node (moves empty children to the end) */ - __forceinline static void compact(AABBNode_t* a) - { - /* find right most filled node */ - ssize_t j=N; - for (j=j-1; j>=0; j--) - if (a->child(j) != NodeRef::emptyNode) - break; - - /* replace empty nodes with filled nodes */ - for (ssize_t i=0; i<j; i++) { - if (a->child(i) == NodeRef::emptyNode) { - a->swap(i,j); - for (j=j-1; j>i; j--) - if (a->child(j) != NodeRef::emptyNode) - break; - } - } - } - - /*! Returns reference to specified child */ - __forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; } - __forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; } - - /*! output operator */ - friend embree_ostream operator<<(embree_ostream o, const AABBNode_t& n) - { - o << "AABBNode { " << embree_endl; - o << " lower_x " << n.lower_x << embree_endl; - o << " upper_x " << n.upper_x << embree_endl; - o << " lower_y " << n.lower_y << embree_endl; - o << " upper_y " << n.upper_y << embree_endl; - o << " lower_z " << n.lower_z << embree_endl; - o << " upper_z " << n.upper_z << embree_endl; - o << " children = "; - for (size_t i=0; i<N; i++) o << n.children[i] << " "; - o << embree_endl; - o << "}" << embree_endl; - return o; - } - - public: - vfloat<N> lower_x; //!< X dimension of lower bounds of all N children. - vfloat<N> upper_x; //!< X dimension of upper bounds of all N children. - vfloat<N> lower_y; //!< Y dimension of lower bounds of all N children. - vfloat<N> upper_y; //!< Y dimension of upper bounds of all N children. - vfloat<N> lower_z; //!< Z dimension of lower bounds of all N children. - vfloat<N> upper_z; //!< Z dimension of upper bounds of all N children. - }; - - template<> - __forceinline void AABBNode_t<NodeRefPtr<4>,4>::bounds(BBox<vfloat4>& bounds0, BBox<vfloat4>& bounds1, BBox<vfloat4>& bounds2, BBox<vfloat4>& bounds3) const { - transpose(lower_x,lower_y,lower_z,vfloat4(zero),bounds0.lower,bounds1.lower,bounds2.lower,bounds3.lower); - transpose(upper_x,upper_y,upper_z,vfloat4(zero),bounds0.upper,bounds1.upper,bounds2.upper,bounds3.upper); - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb.h deleted file mode 100644 index 501f4bce5b..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb.h +++ /dev/null @@ -1,247 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh_node_base.h" - -namespace embree -{ - /*! Motion Blur AABBNode */ - template<typename NodeRef, int N> - struct AABBNodeMB_t : public BaseNode_t<NodeRef, N> - { - using BaseNode_t<NodeRef,N>::children; - typedef BVHNodeRecord<NodeRef> NodeRecord; - typedef BVHNodeRecordMB<NodeRef> NodeRecordMB; - typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D; - - struct Create - { - template<typename BuildRecord> - __forceinline NodeRef operator() (BuildRecord* children, const size_t num, const FastAllocator::CachedAllocator& alloc) const - { - AABBNodeMB_t* node = (AABBNodeMB_t*) alloc.malloc0(sizeof(AABBNodeMB_t),NodeRef::byteNodeAlignment); node->clear(); - return NodeRef::encodeNode(node); - } - }; - - struct Set - { - template<typename BuildRecord> - __forceinline NodeRecordMB operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRecordMB* children, const size_t num) const - { - AABBNodeMB_t* node = ref.getAABBNodeMB(); - - LBBox3fa bounds = empty; - for (size_t i=0; i<num; i++) { - node->setRef(i,children[i].ref); - node->setBounds(i,children[i].lbounds); - bounds.extend(children[i].lbounds); - } - return NodeRecordMB(ref,bounds); - } - }; - - struct SetTimeRange - { - __forceinline SetTimeRange(BBox1f tbounds) : tbounds(tbounds) {} - - template<typename BuildRecord> - __forceinline NodeRecordMB operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRecordMB* children, const size_t num) const - { - AABBNodeMB_t* node = ref.getAABBNodeMB(); - - LBBox3fa bounds = empty; - for (size_t i=0; i<num; i++) { - node->setRef(i, children[i].ref); - node->setBounds(i, children[i].lbounds, tbounds); - bounds.extend(children[i].lbounds); - } - return NodeRecordMB(ref,bounds); - } - - BBox1f tbounds; - }; - - /*! Clears the node. */ - __forceinline void clear() { - lower_x = lower_y = lower_z = vfloat<N>(pos_inf); - upper_x = upper_y = upper_z = vfloat<N>(neg_inf); - lower_dx = lower_dy = lower_dz = vfloat<N>(0.0f); - upper_dx = upper_dy = upper_dz = vfloat<N>(0.0f); - BaseNode_t<NodeRef,N>::clear(); - } - - /*! Sets ID of child. */ - __forceinline void setRef(size_t i, NodeRef ref) { - children[i] = ref; - } - - /*! Sets bounding box of child. */ - __forceinline void setBounds(size_t i, const BBox3fa& bounds0_i, const BBox3fa& bounds1_i) - { - /*! for empty bounds we have to avoid inf-inf=nan */ - BBox3fa bounds0(min(bounds0_i.lower,Vec3fa(+FLT_MAX)),max(bounds0_i.upper,Vec3fa(-FLT_MAX))); - BBox3fa bounds1(min(bounds1_i.lower,Vec3fa(+FLT_MAX)),max(bounds1_i.upper,Vec3fa(-FLT_MAX))); - bounds0 = bounds0.enlarge_by(4.0f*float(ulp)); - bounds1 = bounds1.enlarge_by(4.0f*float(ulp)); - Vec3fa dlower = bounds1.lower-bounds0.lower; - Vec3fa dupper = bounds1.upper-bounds0.upper; - - lower_x[i] = bounds0.lower.x; lower_y[i] = bounds0.lower.y; lower_z[i] = bounds0.lower.z; - upper_x[i] = bounds0.upper.x; upper_y[i] = bounds0.upper.y; upper_z[i] = bounds0.upper.z; - - lower_dx[i] = dlower.x; lower_dy[i] = dlower.y; lower_dz[i] = dlower.z; - upper_dx[i] = dupper.x; upper_dy[i] = dupper.y; upper_dz[i] = dupper.z; - } - - /*! Sets bounding box of child. */ - __forceinline void setBounds(size_t i, const LBBox3fa& bounds) { - setBounds(i, bounds.bounds0, bounds.bounds1); - } - - /*! Sets bounding box of child. */ - __forceinline void setBounds(size_t i, const LBBox3fa& bounds, const BBox1f& tbounds) { - setBounds(i, bounds.global(tbounds)); - } - - /*! Sets bounding box and ID of child. */ - __forceinline void set(size_t i, NodeRef ref, const BBox3fa& bounds) { - lower_x[i] = bounds.lower.x; lower_y[i] = bounds.lower.y; lower_z[i] = bounds.lower.z; - upper_x[i] = bounds.upper.x; upper_y[i] = bounds.upper.y; upper_z[i] = bounds.upper.z; - children[i] = ref; - } - - /*! Sets bounding box and ID of child. */ - __forceinline void set(size_t i, const NodeRecordMB4D& child) - { - setRef(i, child.ref); - setBounds(i, child.lbounds, child.dt); - } - - /*! Return bounding box for time 0 */ - __forceinline BBox3fa bounds0(size_t i) const { - return BBox3fa(Vec3fa(lower_x[i],lower_y[i],lower_z[i]), - Vec3fa(upper_x[i],upper_y[i],upper_z[i])); - } - - /*! Return bounding box for time 1 */ - __forceinline BBox3fa bounds1(size_t i) const { - return BBox3fa(Vec3fa(lower_x[i]+lower_dx[i],lower_y[i]+lower_dy[i],lower_z[i]+lower_dz[i]), - Vec3fa(upper_x[i]+upper_dx[i],upper_y[i]+upper_dy[i],upper_z[i]+upper_dz[i])); - } - - /*! Returns bounds of node. */ - __forceinline BBox3fa bounds() const { - return BBox3fa(Vec3fa(reduce_min(min(lower_x,lower_x+lower_dx)), - reduce_min(min(lower_y,lower_y+lower_dy)), - reduce_min(min(lower_z,lower_z+lower_dz))), - Vec3fa(reduce_max(max(upper_x,upper_x+upper_dx)), - reduce_max(max(upper_y,upper_y+upper_dy)), - reduce_max(max(upper_z,upper_z+upper_dz)))); - } - - /*! Return bounding box of child i */ - __forceinline BBox3fa bounds(size_t i) const { - return merge(bounds0(i),bounds1(i)); - } - - /*! Return linear bounding box of child i */ - __forceinline LBBox3fa lbounds(size_t i) const { - return LBBox3fa(bounds0(i),bounds1(i)); - } - - /*! Return bounding box of child i at specified time */ - __forceinline BBox3fa bounds(size_t i, float time) const { - return lerp(bounds0(i),bounds1(i),time); - } - - /*! Returns the expected surface area when randomly sampling the time. */ - __forceinline float expectedHalfArea(size_t i) const { - return lbounds(i).expectedHalfArea(); - } - - /*! Returns the expected surface area when randomly sampling the time. */ - __forceinline float expectedHalfArea(size_t i, const BBox1f& t0t1) const { - return lbounds(i).expectedHalfArea(t0t1); - } - - /*! swap two children of the node */ - __forceinline void swap(size_t i, size_t j) - { - assert(i<N && j<N); - std::swap(children[i],children[j]); - - std::swap(lower_x[i],lower_x[j]); - std::swap(upper_x[i],upper_x[j]); - std::swap(lower_y[i],lower_y[j]); - std::swap(upper_y[i],upper_y[j]); - std::swap(lower_z[i],lower_z[j]); - std::swap(upper_z[i],upper_z[j]); - - std::swap(lower_dx[i],lower_dx[j]); - std::swap(upper_dx[i],upper_dx[j]); - std::swap(lower_dy[i],lower_dy[j]); - std::swap(upper_dy[i],upper_dy[j]); - std::swap(lower_dz[i],lower_dz[j]); - std::swap(upper_dz[i],upper_dz[j]); - } - - /*! compacts a node (moves empty children to the end) */ - __forceinline static void compact(AABBNodeMB_t* a) - { - /* find right most filled node */ - ssize_t j=N; - for (j=j-1; j>=0; j--) - if (a->child(j) != NodeRef::emptyNode) - break; - - /* replace empty nodes with filled nodes */ - for (ssize_t i=0; i<j; i++) { - if (a->child(i) == NodeRef::emptyNode) { - a->swap(i,j); - for (j=j-1; j>i; j--) - if (a->child(j) != NodeRef::emptyNode) - break; - } - } - } - - /*! Returns reference to specified child */ - __forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; } - __forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; } - - /*! stream output operator */ - friend embree_ostream operator<<(embree_ostream cout, const AABBNodeMB_t& n) - { - cout << "AABBNodeMB {" << embree_endl; - for (size_t i=0; i<N; i++) - { - const BBox3fa b0 = n.bounds0(i); - const BBox3fa b1 = n.bounds1(i); - cout << " child" << i << " { " << embree_endl; - cout << " bounds0 = " << b0 << ", " << embree_endl; - cout << " bounds1 = " << b1 << ", " << embree_endl; - cout << " }"; - } - cout << "}"; - return cout; - } - - public: - vfloat<N> lower_x; //!< X dimension of lower bounds of all N children. - vfloat<N> upper_x; //!< X dimension of upper bounds of all N children. - vfloat<N> lower_y; //!< Y dimension of lower bounds of all N children. - vfloat<N> upper_y; //!< Y dimension of upper bounds of all N children. - vfloat<N> lower_z; //!< Z dimension of lower bounds of all N children. - vfloat<N> upper_z; //!< Z dimension of upper bounds of all N children. - - vfloat<N> lower_dx; //!< X dimension of lower bounds of all N children. - vfloat<N> upper_dx; //!< X dimension of upper bounds of all N children. - vfloat<N> lower_dy; //!< Y dimension of lower bounds of all N children. - vfloat<N> upper_dy; //!< Y dimension of upper bounds of all N children. - vfloat<N> lower_dz; //!< Z dimension of lower bounds of all N children. - vfloat<N> upper_dz; //!< Z dimension of upper bounds of all N children. - }; -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb4d.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb4d.h deleted file mode 100644 index e968bbbc39..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb4d.h +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh_node_aabb_mb.h" - -namespace embree -{ - /*! Aligned 4D Motion Blur Node */ - template<typename NodeRef, int N> - struct AABBNodeMB4D_t : public AABBNodeMB_t<NodeRef, N> - { - using BaseNode_t<NodeRef,N>::children; - using AABBNodeMB_t<NodeRef,N>::set; - - typedef BVHNodeRecord<NodeRef> NodeRecord; - typedef BVHNodeRecordMB<NodeRef> NodeRecordMB; - typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D; - - struct Create - { - template<typename BuildRecord> - __forceinline NodeRef operator() (BuildRecord*, const size_t, const FastAllocator::CachedAllocator& alloc, bool hasTimeSplits = true) const - { - if (hasTimeSplits) - { - AABBNodeMB4D_t* node = (AABBNodeMB4D_t*) alloc.malloc0(sizeof(AABBNodeMB4D_t),NodeRef::byteNodeAlignment); node->clear(); - return NodeRef::encodeNode(node); - } - else - { - AABBNodeMB_t<NodeRef,N>* node = (AABBNodeMB_t<NodeRef,N>*) alloc.malloc0(sizeof(AABBNodeMB_t<NodeRef,N>),NodeRef::byteNodeAlignment); node->clear(); - return NodeRef::encodeNode(node); - } - } - }; - - struct Set - { - template<typename BuildRecord> - __forceinline void operator() (const BuildRecord&, const BuildRecord*, NodeRef ref, NodeRecordMB4D* children, const size_t num) const - { - if (likely(ref.isAABBNodeMB())) { - for (size_t i=0; i<num; i++) - ref.getAABBNodeMB()->set(i, children[i]); - } else { - for (size_t i=0; i<num; i++) - ref.getAABBNodeMB4D()->set(i, children[i]); - } - } - }; - - /*! Clears the node. */ - __forceinline void clear() { - lower_t = vfloat<N>(pos_inf); - upper_t = vfloat<N>(neg_inf); - AABBNodeMB_t<NodeRef,N>::clear(); - } - - /*! Sets bounding box of child. */ - __forceinline void setBounds(size_t i, const LBBox3fa& bounds, const BBox1f& tbounds) - { - AABBNodeMB_t<NodeRef,N>::setBounds(i, bounds.global(tbounds)); - lower_t[i] = tbounds.lower; - upper_t[i] = tbounds.upper == 1.0f ? 1.0f+float(ulp) : tbounds.upper; - } - - /*! Sets bounding box and ID of child. */ - __forceinline void set(size_t i, const NodeRecordMB4D& child) { - AABBNodeMB_t<NodeRef,N>::setRef(i,child.ref); - setBounds(i, child.lbounds, child.dt); - } - - /*! Returns the expected surface area when randomly sampling the time. */ - __forceinline float expectedHalfArea(size_t i) const { - return AABBNodeMB_t<NodeRef,N>::lbounds(i).expectedHalfArea(timeRange(i)); - } - - /*! returns time range for specified child */ - __forceinline BBox1f timeRange(size_t i) const { - return BBox1f(lower_t[i],upper_t[i]); - } - - /*! stream output operator */ - friend embree_ostream operator<<(embree_ostream cout, const AABBNodeMB4D_t& n) - { - cout << "AABBNodeMB4D {" << embree_endl; - for (size_t i=0; i<N; i++) - { - const BBox3fa b0 = n.bounds0(i); - const BBox3fa b1 = n.bounds1(i); - cout << " child" << i << " { " << embree_endl; - cout << " bounds0 = " << lerp(b0,b1,n.lower_t[i]) << ", " << embree_endl; - cout << " bounds1 = " << lerp(b0,b1,n.upper_t[i]) << ", " << embree_endl; - cout << " time_bounds = " << n.lower_t[i] << ", " << n.upper_t[i] << embree_endl; - cout << " }"; - } - cout << "}"; - return cout; - } - - public: - vfloat<N> lower_t; //!< time dimension of lower bounds of all N children - vfloat<N> upper_t; //!< time dimension of upper bounds of all N children - }; -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_base.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_base.h deleted file mode 100644 index 8268f3b932..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_base.h +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh_node_ref.h" - -namespace embree -{ - - /*! BVHN Base Node */ - template<typename NodeRef, int N> - struct BaseNode_t - { - /*! Clears the node. */ - __forceinline void clear() - { - for (size_t i=0; i<N; i++) - children[i] = NodeRef::emptyNode; - } - - /*! Returns reference to specified child */ - __forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; } - __forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; } - - /*! verifies the node */ - __forceinline bool verify() const - { - for (size_t i=0; i<N; i++) { - if (child(i) == NodeRef::emptyNode) { - for (; i<N; i++) { - if (child(i) != NodeRef::emptyNode) - return false; - } - break; - } - } - return true; - } - - NodeRef children[N]; //!< Pointer to the N children (can be a node or leaf) - }; -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb.h deleted file mode 100644 index fa7cc08211..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb.h +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh_node_base.h" - -namespace embree -{ - /*! Node with unaligned bounds */ - template<typename NodeRef, int N> - struct OBBNode_t : public BaseNode_t<NodeRef, N> - { - using BaseNode_t<NodeRef,N>::children; - - struct Create - { - __forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc) const - { - OBBNode_t* node = (OBBNode_t*) alloc.malloc0(sizeof(OBBNode_t),NodeRef::byteNodeAlignment); node->clear(); - return NodeRef::encodeNode(node); - } - }; - - struct Set - { - __forceinline void operator() (NodeRef node, size_t i, NodeRef child, const OBBox3fa& bounds) const { - node.ungetAABBNode()->setRef(i,child); - node.ungetAABBNode()->setBounds(i,bounds); - } - }; - - /*! Clears the node. */ - __forceinline void clear() - { - naabb.l.vx = Vec3fa(nan); - naabb.l.vy = Vec3fa(nan); - naabb.l.vz = Vec3fa(nan); - naabb.p = Vec3fa(nan); - BaseNode_t<NodeRef,N>::clear(); - } - - /*! Sets bounding box. */ - __forceinline void setBounds(size_t i, const OBBox3fa& b) - { - assert(i < N); - - AffineSpace3fa space = b.space; - space.p -= b.bounds.lower; - space = AffineSpace3fa::scale(1.0f/max(Vec3fa(1E-19f),b.bounds.upper-b.bounds.lower))*space; - - naabb.l.vx.x[i] = space.l.vx.x; - naabb.l.vx.y[i] = space.l.vx.y; - naabb.l.vx.z[i] = space.l.vx.z; - - naabb.l.vy.x[i] = space.l.vy.x; - naabb.l.vy.y[i] = space.l.vy.y; - naabb.l.vy.z[i] = space.l.vy.z; - - naabb.l.vz.x[i] = space.l.vz.x; - naabb.l.vz.y[i] = space.l.vz.y; - naabb.l.vz.z[i] = space.l.vz.z; - - naabb.p.x[i] = space.p.x; - naabb.p.y[i] = space.p.y; - naabb.p.z[i] = space.p.z; - } - - /*! Sets ID of child. */ - __forceinline void setRef(size_t i, const NodeRef& ref) { - assert(i < N); - children[i] = ref; - } - - /*! Returns the extent of the bounds of the ith child */ - __forceinline Vec3fa extent(size_t i) const { - assert(i<N); - const Vec3fa vx(naabb.l.vx.x[i],naabb.l.vx.y[i],naabb.l.vx.z[i]); - const Vec3fa vy(naabb.l.vy.x[i],naabb.l.vy.y[i],naabb.l.vy.z[i]); - const Vec3fa vz(naabb.l.vz.x[i],naabb.l.vz.y[i],naabb.l.vz.z[i]); - return rsqrt(vx*vx + vy*vy + vz*vz); - } - - /*! Returns reference to specified child */ - __forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; } - __forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; } - - /*! output operator */ - friend embree_ostream operator<<(embree_ostream o, const OBBNode_t& n) - { - o << "UnAABBNode { " << n.naabb << " } " << embree_endl; - return o; - } - - public: - AffineSpace3vf<N> naabb; //!< non-axis aligned bounding boxes (bounds are [0,1] in specified space) - }; -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb_mb.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb_mb.h deleted file mode 100644 index 834cf5ec28..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb_mb.h +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh_node_base.h" - -namespace embree -{ - template<typename NodeRef, int N> - struct OBBNodeMB_t : public BaseNode_t<NodeRef, N> - { - using BaseNode_t<NodeRef,N>::children; - - struct Create - { - __forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc) const - { - OBBNodeMB_t* node = (OBBNodeMB_t*) alloc.malloc0(sizeof(OBBNodeMB_t),NodeRef::byteNodeAlignment); node->clear(); - return NodeRef::encodeNode(node); - } - }; - - struct Set - { - __forceinline void operator() (NodeRef node, size_t i, NodeRef child, const LinearSpace3fa& space, const LBBox3fa& lbounds, const BBox1f dt) const { - node.ungetAABBNodeMB()->setRef(i,child); - node.ungetAABBNodeMB()->setBounds(i,space,lbounds.global(dt)); - } - }; - - /*! Clears the node. */ - __forceinline void clear() - { - space0 = one; - //b0.lower = b0.upper = Vec3fa(nan); - b1.lower = b1.upper = Vec3fa(nan); - BaseNode_t<NodeRef,N>::clear(); - } - - /*! Sets space and bounding boxes. */ - __forceinline void setBounds(size_t i, const AffineSpace3fa& space, const LBBox3fa& lbounds) { - setBounds(i,space,lbounds.bounds0,lbounds.bounds1); - } - - /*! Sets space and bounding boxes. */ - __forceinline void setBounds(size_t i, const AffineSpace3fa& s0, const BBox3fa& a, const BBox3fa& c) - { - assert(i < N); - - AffineSpace3fa space = s0; - space.p -= a.lower; - Vec3fa scale = 1.0f/max(Vec3fa(1E-19f),a.upper-a.lower); - space = AffineSpace3fa::scale(scale)*space; - BBox3fa a1((a.lower-a.lower)*scale,(a.upper-a.lower)*scale); - BBox3fa c1((c.lower-a.lower)*scale,(c.upper-a.lower)*scale); - - space0.l.vx.x[i] = space.l.vx.x; space0.l.vx.y[i] = space.l.vx.y; space0.l.vx.z[i] = space.l.vx.z; - space0.l.vy.x[i] = space.l.vy.x; space0.l.vy.y[i] = space.l.vy.y; space0.l.vy.z[i] = space.l.vy.z; - space0.l.vz.x[i] = space.l.vz.x; space0.l.vz.y[i] = space.l.vz.y; space0.l.vz.z[i] = space.l.vz.z; - space0.p .x[i] = space.p .x; space0.p .y[i] = space.p .y; space0.p .z[i] = space.p .z; - - /*b0.lower.x[i] = a1.lower.x; b0.lower.y[i] = a1.lower.y; b0.lower.z[i] = a1.lower.z; - b0.upper.x[i] = a1.upper.x; b0.upper.y[i] = a1.upper.y; b0.upper.z[i] = a1.upper.z;*/ - - b1.lower.x[i] = c1.lower.x; b1.lower.y[i] = c1.lower.y; b1.lower.z[i] = c1.lower.z; - b1.upper.x[i] = c1.upper.x; b1.upper.y[i] = c1.upper.y; b1.upper.z[i] = c1.upper.z; - } - - /*! Sets ID of child. */ - __forceinline void setRef(size_t i, const NodeRef& ref) { - assert(i < N); - children[i] = ref; - } - - /*! Returns the extent of the bounds of the ith child */ - __forceinline Vec3fa extent0(size_t i) const { - assert(i < N); - const Vec3fa vx(space0.l.vx.x[i],space0.l.vx.y[i],space0.l.vx.z[i]); - const Vec3fa vy(space0.l.vy.x[i],space0.l.vy.y[i],space0.l.vy.z[i]); - const Vec3fa vz(space0.l.vz.x[i],space0.l.vz.y[i],space0.l.vz.z[i]); - return rsqrt(vx*vx + vy*vy + vz*vz); - } - - public: - AffineSpace3vf<N> space0; - //BBox3vf<N> b0; // these are the unit bounds - BBox3vf<N> b1; - }; -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_qaabb.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_qaabb.h deleted file mode 100644 index 5212821f3f..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_qaabb.h +++ /dev/null @@ -1,265 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh_node_base.h" - -namespace embree -{ - /*! BVHN Quantized Node */ - template<int N> - struct __aligned(8) QuantizedBaseNode_t - { - typedef unsigned char T; - static const T MIN_QUAN = 0; - static const T MAX_QUAN = 255; - - /*! Clears the node. */ - __forceinline void clear() { - for (size_t i=0; i<N; i++) lower_x[i] = lower_y[i] = lower_z[i] = MAX_QUAN; - for (size_t i=0; i<N; i++) upper_x[i] = upper_y[i] = upper_z[i] = MIN_QUAN; - } - - /*! Returns bounds of specified child. */ - __forceinline BBox3fa bounds(size_t i) const - { - assert(i < N); - const Vec3fa lower(madd(scale.x,(float)lower_x[i],start.x), - madd(scale.y,(float)lower_y[i],start.y), - madd(scale.z,(float)lower_z[i],start.z)); - const Vec3fa upper(madd(scale.x,(float)upper_x[i],start.x), - madd(scale.y,(float)upper_y[i],start.y), - madd(scale.z,(float)upper_z[i],start.z)); - return BBox3fa(lower,upper); - } - - /*! Returns extent of bounds of specified child. */ - __forceinline Vec3fa extent(size_t i) const { - return bounds(i).size(); - } - - static __forceinline void init_dim(const vfloat<N> &lower, - const vfloat<N> &upper, - T lower_quant[N], - T upper_quant[N], - float &start, - float &scale) - { - /* quantize bounds */ - const vbool<N> m_valid = lower != vfloat<N>(pos_inf); - const float minF = reduce_min(lower); - const float maxF = reduce_max(upper); - float diff = (1.0f+2.0f*float(ulp))*(maxF - minF); - float decode_scale = diff / float(MAX_QUAN); - if (decode_scale == 0.0f) decode_scale = 2.0f*FLT_MIN; // result may have been flushed to zero - assert(madd(decode_scale,float(MAX_QUAN),minF) >= maxF); - const float encode_scale = diff > 0 ? (float(MAX_QUAN) / diff) : 0.0f; - vint<N> ilower = max(vint<N>(floor((lower - vfloat<N>(minF))*vfloat<N>(encode_scale))),MIN_QUAN); - vint<N> iupper = min(vint<N>(ceil ((upper - vfloat<N>(minF))*vfloat<N>(encode_scale))),MAX_QUAN); - - /* lower/upper correction */ - vbool<N> m_lower_correction = (madd(vfloat<N>(ilower),decode_scale,minF)) > lower; - vbool<N> m_upper_correction = (madd(vfloat<N>(iupper),decode_scale,minF)) < upper; - ilower = max(select(m_lower_correction,ilower-1,ilower),MIN_QUAN); - iupper = min(select(m_upper_correction,iupper+1,iupper),MAX_QUAN); - - /* disable invalid lanes */ - ilower = select(m_valid,ilower,MAX_QUAN); - iupper = select(m_valid,iupper,MIN_QUAN); - - /* store as uchar to memory */ - vint<N>::store(lower_quant,ilower); - vint<N>::store(upper_quant,iupper); - start = minF; - scale = decode_scale; - -#if defined(DEBUG) - vfloat<N> extract_lower( vint<N>::loadu(lower_quant) ); - vfloat<N> extract_upper( vint<N>::loadu(upper_quant) ); - vfloat<N> final_extract_lower = madd(extract_lower,decode_scale,minF); - vfloat<N> final_extract_upper = madd(extract_upper,decode_scale,minF); - assert( (movemask(final_extract_lower <= lower ) & movemask(m_valid)) == movemask(m_valid)); - assert( (movemask(final_extract_upper >= upper ) & movemask(m_valid)) == movemask(m_valid)); -#endif - } - - __forceinline void init_dim(AABBNode_t<NodeRefPtr<N>,N>& node) - { - init_dim(node.lower_x,node.upper_x,lower_x,upper_x,start.x,scale.x); - init_dim(node.lower_y,node.upper_y,lower_y,upper_y,start.y,scale.y); - init_dim(node.lower_z,node.upper_z,lower_z,upper_z,start.z,scale.z); - } - - __forceinline vbool<N> validMask() const { return vint<N>::loadu(lower_x) <= vint<N>::loadu(upper_x); } - -#if defined(__AVX512F__) // KNL - __forceinline vbool16 validMask16() const { return le(0xff,vint<16>::loadu(lower_x),vint<16>::loadu(upper_x)); } -#endif - __forceinline vfloat<N> dequantizeLowerX() const { return madd(vfloat<N>(vint<N>::loadu(lower_x)),scale.x,vfloat<N>(start.x)); } - - __forceinline vfloat<N> dequantizeUpperX() const { return madd(vfloat<N>(vint<N>::loadu(upper_x)),scale.x,vfloat<N>(start.x)); } - - __forceinline vfloat<N> dequantizeLowerY() const { return madd(vfloat<N>(vint<N>::loadu(lower_y)),scale.y,vfloat<N>(start.y)); } - - __forceinline vfloat<N> dequantizeUpperY() const { return madd(vfloat<N>(vint<N>::loadu(upper_y)),scale.y,vfloat<N>(start.y)); } - - __forceinline vfloat<N> dequantizeLowerZ() const { return madd(vfloat<N>(vint<N>::loadu(lower_z)),scale.z,vfloat<N>(start.z)); } - - __forceinline vfloat<N> dequantizeUpperZ() const { return madd(vfloat<N>(vint<N>::loadu(upper_z)),scale.z,vfloat<N>(start.z)); } - - template <int M> - __forceinline vfloat<M> dequantize(const size_t offset) const { return vfloat<M>(vint<M>::loadu(all_planes+offset)); } - -#if defined(__AVX512F__) - __forceinline vfloat16 dequantizeLowerUpperX(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_x),p)),scale.x,vfloat16(start.x)); } - __forceinline vfloat16 dequantizeLowerUpperY(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_y),p)),scale.y,vfloat16(start.y)); } - __forceinline vfloat16 dequantizeLowerUpperZ(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_z),p)),scale.z,vfloat16(start.z)); } -#endif - - union { - struct { - T lower_x[N]; //!< 8bit discretized X dimension of lower bounds of all N children - T upper_x[N]; //!< 8bit discretized X dimension of upper bounds of all N children - T lower_y[N]; //!< 8bit discretized Y dimension of lower bounds of all N children - T upper_y[N]; //!< 8bit discretized Y dimension of upper bounds of all N children - T lower_z[N]; //!< 8bit discretized Z dimension of lower bounds of all N children - T upper_z[N]; //!< 8bit discretized Z dimension of upper bounds of all N children - }; - T all_planes[6*N]; - }; - - Vec3f start; - Vec3f scale; - - friend embree_ostream operator<<(embree_ostream o, const QuantizedBaseNode_t& n) - { - o << "QuantizedBaseNode { " << embree_endl; - o << " start " << n.start << embree_endl; - o << " scale " << n.scale << embree_endl; - o << " lower_x " << vuint<N>::loadu(n.lower_x) << embree_endl; - o << " upper_x " << vuint<N>::loadu(n.upper_x) << embree_endl; - o << " lower_y " << vuint<N>::loadu(n.lower_y) << embree_endl; - o << " upper_y " << vuint<N>::loadu(n.upper_y) << embree_endl; - o << " lower_z " << vuint<N>::loadu(n.lower_z) << embree_endl; - o << " upper_z " << vuint<N>::loadu(n.upper_z) << embree_endl; - o << "}" << embree_endl; - return o; - } - - }; - - template<typename NodeRef, int N> - struct __aligned(8) QuantizedNode_t : public BaseNode_t<NodeRef, N>, QuantizedBaseNode_t<N> - { - using BaseNode_t<NodeRef,N>::children; - using QuantizedBaseNode_t<N>::lower_x; - using QuantizedBaseNode_t<N>::upper_x; - using QuantizedBaseNode_t<N>::lower_y; - using QuantizedBaseNode_t<N>::upper_y; - using QuantizedBaseNode_t<N>::lower_z; - using QuantizedBaseNode_t<N>::upper_z; - using QuantizedBaseNode_t<N>::start; - using QuantizedBaseNode_t<N>::scale; - using QuantizedBaseNode_t<N>::init_dim; - - __forceinline void setRef(size_t i, const NodeRef& ref) { - assert(i < N); - children[i] = ref; - } - - struct Create2 - { - template<typename BuildRecord> - __forceinline NodeRef operator() (BuildRecord* children, const size_t n, const FastAllocator::CachedAllocator& alloc) const - { - __aligned(64) AABBNode_t<NodeRef,N> node; - node.clear(); - for (size_t i=0; i<n; i++) { - node.setBounds(i,children[i].bounds()); - } - QuantizedNode_t *qnode = (QuantizedNode_t*) alloc.malloc0(sizeof(QuantizedNode_t), NodeRef::byteAlignment); - qnode->init(node); - - return (size_t)qnode | NodeRef::tyQuantizedNode; - } - }; - - struct Set2 - { - template<typename BuildRecord> - __forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const - { - QuantizedNode_t* node = ref.quantizedNode(); - for (size_t i=0; i<num; i++) node->setRef(i,children[i]); - return ref; - } - }; - - __forceinline void init(AABBNode_t<NodeRef,N>& node) - { - for (size_t i=0;i<N;i++) children[i] = NodeRef::emptyNode; - init_dim(node); - } - - }; - - /*! BVHN Quantized Node */ - template<int N> - struct __aligned(8) QuantizedBaseNodeMB_t - { - QuantizedBaseNode_t<N> node0; - QuantizedBaseNode_t<N> node1; - - /*! Clears the node. */ - __forceinline void clear() { - node0.clear(); - node1.clear(); - } - - /*! Returns bounds of specified child. */ - __forceinline BBox3fa bounds(size_t i) const - { - assert(i < N); - BBox3fa bounds0 = node0.bounds(i); - BBox3fa bounds1 = node1.bounds(i); - bounds0.extend(bounds1); - return bounds0; - } - - /*! Returns extent of bounds of specified child. */ - __forceinline Vec3fa extent(size_t i) const { - return bounds(i).size(); - } - - __forceinline vbool<N> validMask() const { return node0.validMask(); } - - template<typename T> - __forceinline vfloat<N> dequantizeLowerX(const T t) const { return lerp(node0.dequantizeLowerX(),node1.dequantizeLowerX(),t); } - template<typename T> - __forceinline vfloat<N> dequantizeUpperX(const T t) const { return lerp(node0.dequantizeUpperX(),node1.dequantizeUpperX(),t); } - template<typename T> - __forceinline vfloat<N> dequantizeLowerY(const T t) const { return lerp(node0.dequantizeLowerY(),node1.dequantizeLowerY(),t); } - template<typename T> - __forceinline vfloat<N> dequantizeUpperY(const T t) const { return lerp(node0.dequantizeUpperY(),node1.dequantizeUpperY(),t); } - template<typename T> - __forceinline vfloat<N> dequantizeLowerZ(const T t) const { return lerp(node0.dequantizeLowerZ(),node1.dequantizeLowerZ(),t); } - template<typename T> - __forceinline vfloat<N> dequantizeUpperZ(const T t) const { return lerp(node0.dequantizeUpperZ(),node1.dequantizeUpperZ(),t); } - - - template<int M> - __forceinline vfloat<M> dequantizeLowerX(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerX()[i]),vfloat<M>(node1.dequantizeLowerX()[i]),t); } - template<int M> - __forceinline vfloat<M> dequantizeUpperX(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperX()[i]),vfloat<M>(node1.dequantizeUpperX()[i]),t); } - template<int M> - __forceinline vfloat<M> dequantizeLowerY(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerY()[i]),vfloat<M>(node1.dequantizeLowerY()[i]),t); } - template<int M> - __forceinline vfloat<M> dequantizeUpperY(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperY()[i]),vfloat<M>(node1.dequantizeUpperY()[i]),t); } - template<int M> - __forceinline vfloat<M> dequantizeLowerZ(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerZ()[i]),vfloat<M>(node1.dequantizeLowerZ()[i]),t); } - template<int M> - __forceinline vfloat<M> dequantizeUpperZ(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperZ()[i]),vfloat<M>(node1.dequantizeUpperZ()[i]),t); } - - }; -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_ref.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_ref.h deleted file mode 100644 index 0f6d4dac7e..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_ref.h +++ /dev/null @@ -1,242 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/default.h" -#include "../common/alloc.h" -#include "../common/accel.h" -#include "../common/device.h" -#include "../common/scene.h" -#include "../geometry/primitive.h" -#include "../common/ray.h" - -namespace embree -{ - /* BVH node reference with bounds */ - template<typename NodeRef> - struct BVHNodeRecord - { - __forceinline BVHNodeRecord() {} - __forceinline BVHNodeRecord(NodeRef ref, const BBox3fa& bounds) : ref(ref), bounds((BBox3fx)bounds) {} - __forceinline BVHNodeRecord(NodeRef ref, const BBox3fx& bounds) : ref(ref), bounds(bounds) {} - - NodeRef ref; - BBox3fx bounds; - }; - - template<typename NodeRef> - struct BVHNodeRecordMB - { - __forceinline BVHNodeRecordMB() {} - __forceinline BVHNodeRecordMB(NodeRef ref, const LBBox3fa& lbounds) : ref(ref), lbounds(lbounds) {} - - NodeRef ref; - LBBox3fa lbounds; - }; - - template<typename NodeRef> - struct BVHNodeRecordMB4D - { - __forceinline BVHNodeRecordMB4D() {} - __forceinline BVHNodeRecordMB4D(NodeRef ref, const LBBox3fa& lbounds, const BBox1f& dt) : ref(ref), lbounds(lbounds), dt(dt) {} - - NodeRef ref; - LBBox3fa lbounds; - BBox1f dt; - }; - - template<typename NodeRef, int N> struct BaseNode_t; - template<typename NodeRef, int N> struct AABBNode_t; - template<typename NodeRef, int N> struct AABBNodeMB_t; - template<typename NodeRef, int N> struct AABBNodeMB4D_t; - template<typename NodeRef, int N> struct OBBNode_t; - template<typename NodeRef, int N> struct OBBNodeMB_t; - template<typename NodeRef, int N> struct QuantizedNode_t; - template<typename NodeRef, int N> struct QuantizedNodeMB_t; - - /*! Pointer that points to a node or a list of primitives */ - template<int N> - struct NodeRefPtr - { - //template<int NN> friend class BVHN; - - /*! Number of bytes the nodes and primitives are minimally aligned to.*/ - static const size_t byteAlignment = 16; - static const size_t byteNodeAlignment = 4*N; - - /*! highest address bit is used as barrier for some algorithms */ - static const size_t barrier_mask = (1LL << (8*sizeof(size_t)-1)); - - /*! Masks the bits that store the number of items per leaf. */ - static const size_t align_mask = byteAlignment-1; - static const size_t items_mask = byteAlignment-1; - - /*! different supported node types */ - static const size_t tyAABBNode = 0; - static const size_t tyAABBNodeMB = 1; - static const size_t tyAABBNodeMB4D = 6; - static const size_t tyOBBNode = 2; - static const size_t tyOBBNodeMB = 3; - static const size_t tyQuantizedNode = 5; - static const size_t tyLeaf = 8; - - /*! Empty node */ - static const size_t emptyNode = tyLeaf; - - /*! Invalid node, used as marker in traversal */ - static const size_t invalidNode = (((size_t)-1) & (~items_mask)) | (tyLeaf+0); - static const size_t popRay = (((size_t)-1) & (~items_mask)) | (tyLeaf+1); - - /*! Maximum number of primitive blocks in a leaf. */ - static const size_t maxLeafBlocks = items_mask-tyLeaf; - - /*! Default constructor */ - __forceinline NodeRefPtr () {} - - /*! Construction from integer */ - __forceinline NodeRefPtr (size_t ptr) : ptr(ptr) {} - - /*! Cast to size_t */ - __forceinline operator size_t() const { return ptr; } - - /*! Sets the barrier bit. */ - __forceinline void setBarrier() { -#if defined(__X86_64__) || defined(__aarch64__) - assert(!isBarrier()); - ptr |= barrier_mask; -#else - assert(false); -#endif - } - - /*! Clears the barrier bit. */ - __forceinline void clearBarrier() { -#if defined(__X86_64__) || defined(__aarch64__) - ptr &= ~barrier_mask; -#else - assert(false); -#endif - } - - /*! Checks if this is an barrier. A barrier tells the top level tree rotations how deep to enter the tree. */ - __forceinline bool isBarrier() const { return (ptr & barrier_mask) != 0; } - - /*! checks if this is a leaf */ - __forceinline size_t isLeaf() const { return ptr & tyLeaf; } - - /*! returns node type */ - __forceinline int type() const { return ptr & (size_t)align_mask; } - - /*! checks if this is a node */ - __forceinline int isAABBNode() const { return (ptr & (size_t)align_mask) == tyAABBNode; } - - /*! checks if this is a motion blur node */ - __forceinline int isAABBNodeMB() const { return (ptr & (size_t)align_mask) == tyAABBNodeMB; } - - /*! checks if this is a 4D motion blur node */ - __forceinline int isAABBNodeMB4D() const { return (ptr & (size_t)align_mask) == tyAABBNodeMB4D; } - - /*! checks if this is a node with unaligned bounding boxes */ - __forceinline int isOBBNode() const { return (ptr & (size_t)align_mask) == tyOBBNode; } - - /*! checks if this is a motion blur node with unaligned bounding boxes */ - __forceinline int isOBBNodeMB() const { return (ptr & (size_t)align_mask) == tyOBBNodeMB; } - - /*! checks if this is a quantized node */ - __forceinline int isQuantizedNode() const { return (ptr & (size_t)align_mask) == tyQuantizedNode; } - - /*! Encodes a node */ - static __forceinline NodeRefPtr encodeNode(AABBNode_t<NodeRefPtr,N>* node) { - assert(!((size_t)node & align_mask)); - return NodeRefPtr((size_t) node); - } - - static __forceinline NodeRefPtr encodeNode(AABBNodeMB_t<NodeRefPtr,N>* node) { - assert(!((size_t)node & align_mask)); - return NodeRefPtr((size_t) node | tyAABBNodeMB); - } - - static __forceinline NodeRefPtr encodeNode(AABBNodeMB4D_t<NodeRefPtr,N>* node) { - assert(!((size_t)node & align_mask)); - return NodeRefPtr((size_t) node | tyAABBNodeMB4D); - } - - /*! Encodes an unaligned node */ - static __forceinline NodeRefPtr encodeNode(OBBNode_t<NodeRefPtr,N>* node) { - return NodeRefPtr((size_t) node | tyOBBNode); - } - - /*! Encodes an unaligned motion blur node */ - static __forceinline NodeRefPtr encodeNode(OBBNodeMB_t<NodeRefPtr,N>* node) { - return NodeRefPtr((size_t) node | tyOBBNodeMB); - } - - /*! Encodes a leaf */ - static __forceinline NodeRefPtr encodeLeaf(void* tri, size_t num) { - assert(!((size_t)tri & align_mask)); - assert(num <= maxLeafBlocks); - return NodeRefPtr((size_t)tri | (tyLeaf+min(num,(size_t)maxLeafBlocks))); - } - - /*! Encodes a leaf */ - static __forceinline NodeRefPtr encodeTypedLeaf(void* ptr, size_t ty) { - assert(!((size_t)ptr & align_mask)); - return NodeRefPtr((size_t)ptr | (tyLeaf+ty)); - } - - /*! returns base node pointer */ - __forceinline BaseNode_t<NodeRefPtr,N>* baseNode() - { - assert(!isLeaf()); - return (BaseNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); - } - __forceinline const BaseNode_t<NodeRefPtr,N>* baseNode() const - { - assert(!isLeaf()); - return (const BaseNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); - } - - /*! returns node pointer */ - __forceinline AABBNode_t<NodeRefPtr,N>* getAABBNode() { assert(isAABBNode()); return ( AABBNode_t<NodeRefPtr,N>*)ptr; } - __forceinline const AABBNode_t<NodeRefPtr,N>* getAABBNode() const { assert(isAABBNode()); return (const AABBNode_t<NodeRefPtr,N>*)ptr; } - - /*! returns motion blur node pointer */ - __forceinline AABBNodeMB_t<NodeRefPtr,N>* getAABBNodeMB() { assert(isAABBNodeMB() || isAABBNodeMB4D()); return ( AABBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); } - __forceinline const AABBNodeMB_t<NodeRefPtr,N>* getAABBNodeMB() const { assert(isAABBNodeMB() || isAABBNodeMB4D()); return (const AABBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); } - - /*! returns 4D motion blur node pointer */ - __forceinline AABBNodeMB4D_t<NodeRefPtr,N>* getAABBNodeMB4D() { assert(isAABBNodeMB4D()); return ( AABBNodeMB4D_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); } - __forceinline const AABBNodeMB4D_t<NodeRefPtr,N>* getAABBNodeMB4D() const { assert(isAABBNodeMB4D()); return (const AABBNodeMB4D_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); } - - /*! returns unaligned node pointer */ - __forceinline OBBNode_t<NodeRefPtr,N>* ungetAABBNode() { assert(isOBBNode()); return ( OBBNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); } - __forceinline const OBBNode_t<NodeRefPtr,N>* ungetAABBNode() const { assert(isOBBNode()); return (const OBBNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); } - - /*! returns unaligned motion blur node pointer */ - __forceinline OBBNodeMB_t<NodeRefPtr,N>* ungetAABBNodeMB() { assert(isOBBNodeMB()); return ( OBBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); } - __forceinline const OBBNodeMB_t<NodeRefPtr,N>* ungetAABBNodeMB() const { assert(isOBBNodeMB()); return (const OBBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); } - - /*! returns quantized node pointer */ - __forceinline QuantizedNode_t<NodeRefPtr,N>* quantizedNode() { assert(isQuantizedNode()); return ( QuantizedNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask ); } - __forceinline const QuantizedNode_t<NodeRefPtr,N>* quantizedNode() const { assert(isQuantizedNode()); return (const QuantizedNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask ); } - - /*! returns leaf pointer */ - __forceinline char* leaf(size_t& num) const { - assert(isLeaf()); - num = (ptr & (size_t)items_mask)-tyLeaf; - return (char*)(ptr & ~(size_t)align_mask); - } - - /*! clear all bit flags */ - __forceinline void clearFlags() { - ptr &= ~(size_t)align_mask; - } - - /*! returns the wideness */ - __forceinline size_t getN() const { return N; } - - public: - size_t ptr; - }; -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.cpp deleted file mode 100644 index a273c21e8b..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.cpp +++ /dev/null @@ -1,247 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "bvh_refit.h" -#include "bvh_statistics.h" - -#include "../geometry/linei.h" -#include "../geometry/triangle.h" -#include "../geometry/trianglev.h" -#include "../geometry/trianglei.h" -#include "../geometry/quadv.h" -#include "../geometry/object.h" -#include "../geometry/instance.h" - -namespace embree -{ - namespace isa - { - static const size_t SINGLE_THREAD_THRESHOLD = 4*1024; - - template<int N> - __forceinline bool compare(const typename BVHN<N>::NodeRef* a, const typename BVHN<N>::NodeRef* b) - { - size_t sa = *(size_t*)&a->node()->lower_x; - size_t sb = *(size_t*)&b->node()->lower_x; - return sa < sb; - } - - template<int N> - BVHNRefitter<N>::BVHNRefitter (BVH* bvh, const LeafBoundsInterface& leafBounds) - : bvh(bvh), leafBounds(leafBounds), numSubTrees(0) - { - } - - template<int N> - void BVHNRefitter<N>::refit() - { - if (bvh->numPrimitives <= SINGLE_THREAD_THRESHOLD) { - bvh->bounds = LBBox3fa(recurse_bottom(bvh->root)); - } - else - { - BBox3fa subTreeBounds[MAX_NUM_SUB_TREES]; - numSubTrees = 0; - gather_subtree_refs(bvh->root,numSubTrees,0); - if (numSubTrees) - parallel_for(size_t(0), numSubTrees, size_t(1), [&](const range<size_t>& r) { - for (size_t i=r.begin(); i<r.end(); i++) { - NodeRef& ref = subTrees[i]; - subTreeBounds[i] = recurse_bottom(ref); - } - }); - - numSubTrees = 0; - bvh->bounds = LBBox3fa(refit_toplevel(bvh->root,numSubTrees,subTreeBounds,0)); - } - } - - template<int N> - void BVHNRefitter<N>::gather_subtree_refs(NodeRef& ref, - size_t &subtrees, - const size_t depth) - { - if (depth >= MAX_SUB_TREE_EXTRACTION_DEPTH) - { - assert(subtrees < MAX_NUM_SUB_TREES); - subTrees[subtrees++] = ref; - return; - } - - if (ref.isAABBNode()) - { - AABBNode* node = ref.getAABBNode(); - for (size_t i=0; i<N; i++) { - NodeRef& child = node->child(i); - if (unlikely(child == BVH::emptyNode)) continue; - gather_subtree_refs(child,subtrees,depth+1); - } - } - } - - template<int N> - BBox3fa BVHNRefitter<N>::refit_toplevel(NodeRef& ref, - size_t &subtrees, - const BBox3fa *const subTreeBounds, - const size_t depth) - { - if (depth >= MAX_SUB_TREE_EXTRACTION_DEPTH) - { - assert(subtrees < MAX_NUM_SUB_TREES); - assert(subTrees[subtrees] == ref); - return subTreeBounds[subtrees++]; - } - - if (ref.isAABBNode()) - { - AABBNode* node = ref.getAABBNode(); - BBox3fa bounds[N]; - - for (size_t i=0; i<N; i++) - { - NodeRef& child = node->child(i); - - if (unlikely(child == BVH::emptyNode)) - bounds[i] = BBox3fa(empty); - else - bounds[i] = refit_toplevel(child,subtrees,subTreeBounds,depth+1); - } - - BBox3vf<N> boundsT = transpose<N>(bounds); - - /* set new bounds */ - node->lower_x = boundsT.lower.x; - node->lower_y = boundsT.lower.y; - node->lower_z = boundsT.lower.z; - node->upper_x = boundsT.upper.x; - node->upper_y = boundsT.upper.y; - node->upper_z = boundsT.upper.z; - - return merge<N>(bounds); - } - else - return leafBounds.leafBounds(ref); - } - - // ========================================================= - // ========================================================= - // ========================================================= - - - template<int N> - BBox3fa BVHNRefitter<N>::recurse_bottom(NodeRef& ref) - { - /* this is a leaf node */ - if (unlikely(ref.isLeaf())) - return leafBounds.leafBounds(ref); - - /* recurse if this is an internal node */ - AABBNode* node = ref.getAABBNode(); - - /* enable exclusive prefetch for >= AVX platforms */ -#if defined(__AVX__) - BVH::prefetchW(ref); -#endif - BBox3fa bounds[N]; - - for (size_t i=0; i<N; i++) - if (unlikely(node->child(i) == BVH::emptyNode)) - { - bounds[i] = BBox3fa(empty); - } - else - bounds[i] = recurse_bottom(node->child(i)); - - /* AOS to SOA transform */ - BBox3vf<N> boundsT = transpose<N>(bounds); - - /* set new bounds */ - node->lower_x = boundsT.lower.x; - node->lower_y = boundsT.lower.y; - node->lower_z = boundsT.lower.z; - node->upper_x = boundsT.upper.x; - node->upper_y = boundsT.upper.y; - node->upper_z = boundsT.upper.z; - - return merge<N>(bounds); - } - - template<int N, typename Mesh, typename Primitive> - BVHNRefitT<N,Mesh,Primitive>::BVHNRefitT (BVH* bvh, Builder* builder, Mesh* mesh, size_t mode) - : bvh(bvh), builder(builder), refitter(new BVHNRefitter<N>(bvh,*(typename BVHNRefitter<N>::LeafBoundsInterface*)this)), mesh(mesh), topologyVersion(0) {} - - template<int N, typename Mesh, typename Primitive> - void BVHNRefitT<N,Mesh,Primitive>::clear() - { - if (builder) - builder->clear(); - } - - template<int N, typename Mesh, typename Primitive> - void BVHNRefitT<N,Mesh,Primitive>::build() - { - if (mesh->topologyChanged(topologyVersion)) { - topologyVersion = mesh->getTopologyVersion(); - builder->build(); - } - else - refitter->refit(); - } - - template class BVHNRefitter<4>; -#if defined(__AVX__) - template class BVHNRefitter<8>; -#endif - -#if defined(EMBREE_GEOMETRY_TRIANGLE) - Builder* BVH4Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode); - Builder* BVH4Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode); - Builder* BVH4Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode); - - Builder* BVH4Triangle4MeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4> ((BVH4*)accel,BVH4Triangle4MeshBuilderSAH (accel,mesh,geomID,mode),mesh,mode); } - Builder* BVH4Triangle4vMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4v>((BVH4*)accel,BVH4Triangle4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); } - Builder* BVH4Triangle4iMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4i>((BVH4*)accel,BVH4Triangle4iMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); } -#if defined(__AVX__) - Builder* BVH8Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode); - Builder* BVH8Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode); - Builder* BVH8Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode); - - Builder* BVH8Triangle4MeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4> ((BVH8*)accel,BVH8Triangle4MeshBuilderSAH (accel,mesh,geomID,mode),mesh,mode); } - Builder* BVH8Triangle4vMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4v>((BVH8*)accel,BVH8Triangle4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); } - Builder* BVH8Triangle4iMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4i>((BVH8*)accel,BVH8Triangle4iMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); } -#endif -#endif - -#if defined(EMBREE_GEOMETRY_QUAD) - Builder* BVH4Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode); - Builder* BVH4Quad4vMeshRefitSAH (void* accel, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,QuadMesh,Quad4v>((BVH4*)accel,BVH4Quad4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); } - -#if defined(__AVX__) - Builder* BVH8Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode); - Builder* BVH8Quad4vMeshRefitSAH (void* accel, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,QuadMesh,Quad4v>((BVH8*)accel,BVH8Quad4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); } -#endif - -#endif - -#if defined(EMBREE_GEOMETRY_USER) - Builder* BVH4VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode); - Builder* BVH4VirtualMeshRefitSAH (void* accel, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,UserGeometry,Object>((BVH4*)accel,BVH4VirtualMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); } - -#if defined(__AVX__) - Builder* BVH8VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode); - Builder* BVH8VirtualMeshRefitSAH (void* accel, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,UserGeometry,Object>((BVH8*)accel,BVH8VirtualMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); } -#endif -#endif - -#if defined(EMBREE_GEOMETRY_INSTANCE) - Builder* BVH4InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode); - Builder* BVH4InstanceMeshRefitSAH (void* accel, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,Instance,InstancePrimitive>((BVH4*)accel,BVH4InstanceMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); } - -#if defined(__AVX__) - Builder* BVH8InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode); - Builder* BVH8InstanceMeshRefitSAH (void* accel, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,Instance,InstancePrimitive>((BVH8*)accel,BVH8InstanceMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); } -#endif -#endif - - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.h deleted file mode 100644 index 4aa9bdd7cc..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.h +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../bvh/bvh.h" - -namespace embree -{ - namespace isa - { - template<int N> - class BVHNRefitter - { - public: - - /*! Type shortcuts */ - typedef BVHN<N> BVH; - typedef typename BVH::AABBNode AABBNode; - typedef typename BVH::NodeRef NodeRef; - - struct LeafBoundsInterface { - virtual const BBox3fa leafBounds(NodeRef& ref) const = 0; - }; - - public: - - /*! Constructor. */ - BVHNRefitter (BVH* bvh, const LeafBoundsInterface& leafBounds); - - /*! refits the BVH */ - void refit(); - - private: - /* single-threaded subtree extraction based on BVH depth */ - void gather_subtree_refs(NodeRef& ref, - size_t &subtrees, - const size_t depth = 0); - - /* single-threaded top-level refit */ - BBox3fa refit_toplevel(NodeRef& ref, - size_t &subtrees, - const BBox3fa *const subTreeBounds, - const size_t depth = 0); - - /* single-threaded subtree refit */ - BBox3fa recurse_bottom(NodeRef& ref); - - public: - BVH* bvh; //!< BVH to refit - const LeafBoundsInterface& leafBounds; //!< calculates bounds of leaves - - static const size_t MAX_SUB_TREE_EXTRACTION_DEPTH = (N==4) ? 4 : (N==8) ? 3 : 3; - static const size_t MAX_NUM_SUB_TREES = (N==4) ? 256 : (N==8) ? 512 : N*N*N; // N ^ MAX_SUB_TREE_EXTRACTION_DEPTH - size_t numSubTrees; - NodeRef subTrees[MAX_NUM_SUB_TREES]; - }; - - template<int N, typename Mesh, typename Primitive> - class BVHNRefitT : public Builder, public BVHNRefitter<N>::LeafBoundsInterface - { - public: - - /*! Type shortcuts */ - typedef BVHN<N> BVH; - typedef typename BVH::AABBNode AABBNode; - typedef typename BVH::NodeRef NodeRef; - - public: - BVHNRefitT (BVH* bvh, Builder* builder, Mesh* mesh, size_t mode); - - virtual void build(); - - virtual void clear(); - - virtual const BBox3fa leafBounds (NodeRef& ref) const - { - size_t num; char* prim = ref.leaf(num); - if (unlikely(ref == BVH::emptyNode)) return empty; - - BBox3fa bounds = empty; - for (size_t i=0; i<num; i++) - bounds.extend(((Primitive*)prim)[i].update(mesh)); - return bounds; - } - - private: - BVH* bvh; - std::unique_ptr<Builder> builder; - std::unique_ptr<BVHNRefitter<N>> refitter; - Mesh* mesh; - unsigned int topologyVersion; - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.cpp deleted file mode 100644 index 2bb431bf0e..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.cpp +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "bvh_rotate.h" - -namespace embree -{ - namespace isa - { - /*! Computes half surface area of box. */ - __forceinline float halfArea3f(const BBox<vfloat4>& box) { - const vfloat4 d = box.size(); - const vfloat4 a = d*shuffle<1,2,0,3>(d); - return a[0]+a[1]+a[2]; - } - - size_t BVHNRotate<4>::rotate(NodeRef parentRef, size_t depth) - { - /*! nothing to rotate if we reached a leaf node. */ - if (parentRef.isBarrier()) return 0; - if (parentRef.isLeaf()) return 0; - AABBNode* parent = parentRef.getAABBNode(); - - /*! rotate all children first */ - vint4 cdepth; - for (size_t c=0; c<4; c++) - cdepth[c] = (int)rotate(parent->child(c),depth+1); - - /* compute current areas of all children */ - vfloat4 sizeX = parent->upper_x-parent->lower_x; - vfloat4 sizeY = parent->upper_y-parent->lower_y; - vfloat4 sizeZ = parent->upper_z-parent->lower_z; - vfloat4 childArea = madd(sizeX,(sizeY + sizeZ),sizeY*sizeZ); - - /*! get node bounds */ - BBox<vfloat4> child1_0,child1_1,child1_2,child1_3; - parent->bounds(child1_0,child1_1,child1_2,child1_3); - - /*! Find best rotation. We pick a first child (child1) and a sub-child - (child2child) of a different second child (child2), and swap child1 - and child2child. We perform the best such swap. */ - float bestArea = 0; - size_t bestChild1 = -1, bestChild2 = -1, bestChild2Child = -1; - for (size_t c2=0; c2<4; c2++) - { - /*! ignore leaf nodes as we cannot descent into them */ - if (parent->child(c2).isBarrier()) continue; - if (parent->child(c2).isLeaf()) continue; - AABBNode* child2 = parent->child(c2).getAABBNode(); - - /*! transpose child bounds */ - BBox<vfloat4> child2c0,child2c1,child2c2,child2c3; - child2->bounds(child2c0,child2c1,child2c2,child2c3); - - /*! put child1_0 at each child2 position */ - float cost00 = halfArea3f(merge(child1_0,child2c1,child2c2,child2c3)); - float cost01 = halfArea3f(merge(child2c0,child1_0,child2c2,child2c3)); - float cost02 = halfArea3f(merge(child2c0,child2c1,child1_0,child2c3)); - float cost03 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_0)); - vfloat4 cost0 = vfloat4(cost00,cost01,cost02,cost03); - vfloat4 min0 = vreduce_min(cost0); - int pos0 = (int)bsf(movemask(min0 == cost0)); - - /*! put child1_1 at each child2 position */ - float cost10 = halfArea3f(merge(child1_1,child2c1,child2c2,child2c3)); - float cost11 = halfArea3f(merge(child2c0,child1_1,child2c2,child2c3)); - float cost12 = halfArea3f(merge(child2c0,child2c1,child1_1,child2c3)); - float cost13 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_1)); - vfloat4 cost1 = vfloat4(cost10,cost11,cost12,cost13); - vfloat4 min1 = vreduce_min(cost1); - int pos1 = (int)bsf(movemask(min1 == cost1)); - - /*! put child1_2 at each child2 position */ - float cost20 = halfArea3f(merge(child1_2,child2c1,child2c2,child2c3)); - float cost21 = halfArea3f(merge(child2c0,child1_2,child2c2,child2c3)); - float cost22 = halfArea3f(merge(child2c0,child2c1,child1_2,child2c3)); - float cost23 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_2)); - vfloat4 cost2 = vfloat4(cost20,cost21,cost22,cost23); - vfloat4 min2 = vreduce_min(cost2); - int pos2 = (int)bsf(movemask(min2 == cost2)); - - /*! put child1_3 at each child2 position */ - float cost30 = halfArea3f(merge(child1_3,child2c1,child2c2,child2c3)); - float cost31 = halfArea3f(merge(child2c0,child1_3,child2c2,child2c3)); - float cost32 = halfArea3f(merge(child2c0,child2c1,child1_3,child2c3)); - float cost33 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_3)); - vfloat4 cost3 = vfloat4(cost30,cost31,cost32,cost33); - vfloat4 min3 = vreduce_min(cost3); - int pos3 = (int)bsf(movemask(min3 == cost3)); - - /*! find best other child */ - vfloat4 area0123 = vfloat4(extract<0>(min0),extract<0>(min1),extract<0>(min2),extract<0>(min3)) - vfloat4(childArea[c2]); - int pos[4] = { pos0,pos1,pos2,pos3 }; - const size_t mbd = BVH4::maxBuildDepth; - vbool4 valid = vint4(int(depth+1))+cdepth <= vint4(mbd); // only select swaps that fulfill depth constraints - valid &= vint4(int(c2)) != vint4(step); - if (none(valid)) continue; - size_t c1 = select_min(valid,area0123); - float area = area0123[c1]; - if (c1 == c2) continue; // can happen if bounds are NANs - - /*! accept a swap when it reduces cost and is not swapping a node with itself */ - if (area < bestArea) { - bestArea = area; - bestChild1 = c1; - bestChild2 = c2; - bestChild2Child = pos[c1]; - } - } - - /*! if we did not find a swap that improves the SAH then do nothing */ - if (bestChild1 == size_t(-1)) return 1+reduce_max(cdepth); - - /*! perform the best found tree rotation */ - AABBNode* child2 = parent->child(bestChild2).getAABBNode(); - AABBNode::swap(parent,bestChild1,child2,bestChild2Child); - parent->setBounds(bestChild2,child2->bounds()); - AABBNode::compact(parent); - AABBNode::compact(child2); - - /*! This returned depth is conservative as the child that was - * pulled up in the tree could have been on the critical path. */ - cdepth[bestChild1]++; // bestChild1 was pushed down one level - return 1+reduce_max(cdepth); - } - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.h deleted file mode 100644 index 009bef339e..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.h +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh.h" - -namespace embree -{ - namespace isa - { - template<int N> - class BVHNRotate - { - typedef typename BVHN<N>::NodeRef NodeRef; - - public: - static const bool enabled = false; - - static __forceinline size_t rotate(NodeRef parentRef, size_t depth = 1) { return 0; } - static __forceinline void restructure(NodeRef ref, size_t depth = 1) {} - }; - - /* BVH4 tree rotations */ - template<> - class BVHNRotate<4> - { - typedef BVH4::AABBNode AABBNode; - typedef BVH4::NodeRef NodeRef; - - public: - static const bool enabled = true; - - static size_t rotate(NodeRef parentRef, size_t depth = 1); - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp deleted file mode 100644 index aa56035026..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp +++ /dev/null @@ -1,168 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "bvh_statistics.h" -#include "../../common/algorithms/parallel_reduce.h" - -namespace embree -{ - template<int N> - BVHNStatistics<N>::BVHNStatistics (BVH* bvh) : bvh(bvh) - { - double A = max(0.0f,bvh->getLinearBounds().expectedHalfArea()); - stat = statistics(bvh->root,A,BBox1f(0.0f,1.0f)); - } - - template<int N> - std::string BVHNStatistics<N>::str() - { - std::ostringstream stream; - stream.setf(std::ios::fixed, std::ios::floatfield); - stream << " primitives = " << bvh->numPrimitives << ", vertices = " << bvh->numVertices << ", depth = " << stat.depth << std::endl; - size_t totalBytes = stat.bytes(bvh); - double totalSAH = stat.sah(bvh); - stream << " total : sah = " << std::setw(7) << std::setprecision(3) << totalSAH << " (100.00%), "; - stream << "#bytes = " << std::setw(7) << std::setprecision(2) << totalBytes/1E6 << " MB (100.00%), "; - stream << "#nodes = " << std::setw(7) << stat.size() << " (" << std::setw(6) << std::setprecision(2) << 100.0*stat.fillRate(bvh) << "% filled), "; - stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(totalBytes)/double(bvh->numPrimitives) << std::endl; - if (stat.statAABBNodes.numNodes ) stream << " getAABBNodes : " << stat.statAABBNodes.toString(bvh,totalSAH,totalBytes) << std::endl; - if (stat.statOBBNodes.numNodes ) stream << " ungetAABBNodes : " << stat.statOBBNodes.toString(bvh,totalSAH,totalBytes) << std::endl; - if (stat.statAABBNodesMB.numNodes ) stream << " getAABBNodesMB : " << stat.statAABBNodesMB.toString(bvh,totalSAH,totalBytes) << std::endl; - if (stat.statAABBNodesMB4D.numNodes) stream << " getAABBNodesMB4D : " << stat.statAABBNodesMB4D.toString(bvh,totalSAH,totalBytes) << std::endl; - if (stat.statOBBNodesMB.numNodes) stream << " ungetAABBNodesMB : " << stat.statOBBNodesMB.toString(bvh,totalSAH,totalBytes) << std::endl; - if (stat.statQuantizedNodes.numNodes ) stream << " quantizedNodes : " << stat.statQuantizedNodes.toString(bvh,totalSAH,totalBytes) << std::endl; - if (true) stream << " leaves : " << stat.statLeaf.toString(bvh,totalSAH,totalBytes) << std::endl; - if (true) stream << " histogram : " << stat.statLeaf.histToString() << std::endl; - return stream.str(); - } - - template<int N> - typename BVHNStatistics<N>::Statistics BVHNStatistics<N>::statistics(NodeRef node, const double A, const BBox1f t0t1) - { - Statistics s; - assert(t0t1.size() > 0.0f); - double dt = max(0.0f,t0t1.size()); - if (node.isAABBNode()) - { - AABBNode* n = node.getAABBNode(); - s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) { - if (n->child(i) == BVH::emptyNode) return Statistics(); - const double Ai = max(0.0f,halfArea(n->extend(i))); - Statistics s = statistics(n->child(i),Ai,t0t1); - s.statAABBNodes.numChildren++; - return s; - }, Statistics::add); - s.statAABBNodes.numNodes++; - s.statAABBNodes.nodeSAH += dt*A; - s.depth++; - } - else if (node.isOBBNode()) - { - OBBNode* n = node.ungetAABBNode(); - s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) { - if (n->child(i) == BVH::emptyNode) return Statistics(); - const double Ai = max(0.0f,halfArea(n->extent(i))); - Statistics s = statistics(n->child(i),Ai,t0t1); - s.statOBBNodes.numChildren++; - return s; - }, Statistics::add); - s.statOBBNodes.numNodes++; - s.statOBBNodes.nodeSAH += dt*A; - s.depth++; - } - else if (node.isAABBNodeMB()) - { - AABBNodeMB* n = node.getAABBNodeMB(); - s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) { - if (n->child(i) == BVH::emptyNode) return Statistics(); - const double Ai = max(0.0f,n->expectedHalfArea(i,t0t1)); - Statistics s = statistics(n->child(i),Ai,t0t1); - s.statAABBNodesMB.numChildren++; - return s; - }, Statistics::add); - s.statAABBNodesMB.numNodes++; - s.statAABBNodesMB.nodeSAH += dt*A; - s.depth++; - } - else if (node.isAABBNodeMB4D()) - { - AABBNodeMB4D* n = node.getAABBNodeMB4D(); - s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) { - if (n->child(i) == BVH::emptyNode) return Statistics(); - const BBox1f t0t1i = intersect(t0t1,n->timeRange(i)); - assert(!t0t1i.empty()); - const double Ai = n->AABBNodeMB::expectedHalfArea(i,t0t1i); - Statistics s = statistics(n->child(i),Ai,t0t1i); - s.statAABBNodesMB4D.numChildren++; - return s; - }, Statistics::add); - s.statAABBNodesMB4D.numNodes++; - s.statAABBNodesMB4D.nodeSAH += dt*A; - s.depth++; - } - else if (node.isOBBNodeMB()) - { - OBBNodeMB* n = node.ungetAABBNodeMB(); - s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) { - if (n->child(i) == BVH::emptyNode) return Statistics(); - const double Ai = max(0.0f,halfArea(n->extent0(i))); - Statistics s = statistics(n->child(i),Ai,t0t1); - s.statOBBNodesMB.numChildren++; - return s; - }, Statistics::add); - s.statOBBNodesMB.numNodes++; - s.statOBBNodesMB.nodeSAH += dt*A; - s.depth++; - } - else if (node.isQuantizedNode()) - { - QuantizedNode* n = node.quantizedNode(); - s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) { - if (n->child(i) == BVH::emptyNode) return Statistics(); - const double Ai = max(0.0f,halfArea(n->extent(i))); - Statistics s = statistics(n->child(i),Ai,t0t1); - s.statQuantizedNodes.numChildren++; - return s; - }, Statistics::add); - s.statQuantizedNodes.numNodes++; - s.statQuantizedNodes.nodeSAH += dt*A; - s.depth++; - } - else if (node.isLeaf()) - { - size_t num; const char* tri = node.leaf(num); - if (num) - { - for (size_t i=0; i<num; i++) - { - const size_t bytes = bvh->primTy->getBytes(tri); - s.statLeaf.numPrimsActive += bvh->primTy->sizeActive(tri); - s.statLeaf.numPrimsTotal += bvh->primTy->sizeTotal(tri); - s.statLeaf.numBytes += bytes; - tri+=bytes; - } - s.statLeaf.numLeaves++; - s.statLeaf.numPrimBlocks += num; - s.statLeaf.leafSAH += dt*A*num; - if (num-1 < Statistics::LeafStat::NHIST) { - s.statLeaf.numPrimBlocksHistogram[num-1]++; - } - } - } - else { - // -- GODOT start -- - // throw std::runtime_error("not supported node type in bvh_statistics"); - abort(); - // -- GODOT end -- - } - return s; - } - -#if defined(__AVX__) - template class BVHNStatistics<8>; -#endif - -#if !defined(__AVX__) || (!defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42)) || defined(__aarch64__) - template class BVHNStatistics<4>; -#endif -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.h deleted file mode 100644 index 73dfc6fbcc..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.h +++ /dev/null @@ -1,285 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh.h" -#include <sstream> - -namespace embree -{ - template<int N> - class BVHNStatistics - { - typedef BVHN<N> BVH; - typedef typename BVH::AABBNode AABBNode; - typedef typename BVH::OBBNode OBBNode; - typedef typename BVH::AABBNodeMB AABBNodeMB; - typedef typename BVH::AABBNodeMB4D AABBNodeMB4D; - typedef typename BVH::OBBNodeMB OBBNodeMB; - typedef typename BVH::QuantizedNode QuantizedNode; - - typedef typename BVH::NodeRef NodeRef; - - struct Statistics - { - template<typename Node> - struct NodeStat - { - NodeStat ( double nodeSAH = 0, - size_t numNodes = 0, - size_t numChildren = 0) - : nodeSAH(nodeSAH), - numNodes(numNodes), - numChildren(numChildren) {} - - double sah(BVH* bvh) const { - return nodeSAH/bvh->getLinearBounds().expectedHalfArea(); - } - - size_t bytes() const { - return numNodes*sizeof(Node); - } - - size_t size() const { - return numNodes; - } - - double fillRateNom () const { return double(numChildren); } - double fillRateDen () const { return double(numNodes*N); } - double fillRate () const { return fillRateNom()/fillRateDen(); } - - __forceinline friend NodeStat operator+ ( const NodeStat& a, const NodeStat& b) - { - return NodeStat(a.nodeSAH + b.nodeSAH, - a.numNodes+b.numNodes, - a.numChildren+b.numChildren); - } - - std::string toString(BVH* bvh, double sahTotal, size_t bytesTotal) const - { - std::ostringstream stream; - stream.setf(std::ios::fixed, std::ios::floatfield); - stream << "sah = " << std::setw(7) << std::setprecision(3) << sah(bvh); - stream << " (" << std::setw(6) << std::setprecision(2) << 100.0*sah(bvh)/sahTotal << "%), "; - stream << "#bytes = " << std::setw(7) << std::setprecision(2) << bytes()/1E6 << " MB "; - stream << "(" << std::setw(6) << std::setprecision(2) << 100.0*double(bytes())/double(bytesTotal) << "%), "; - stream << "#nodes = " << std::setw(7) << numNodes << " (" << std::setw(6) << std::setprecision(2) << 100.0*fillRate() << "% filled), "; - stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytes())/double(bvh->numPrimitives); - return stream.str(); - } - - public: - double nodeSAH; - size_t numNodes; - size_t numChildren; - }; - - struct LeafStat - { - static const int NHIST = 8; - - LeafStat ( double leafSAH = 0.0f, - size_t numLeaves = 0, - size_t numPrimsActive = 0, - size_t numPrimsTotal = 0, - size_t numPrimBlocks = 0, - size_t numBytes = 0) - : leafSAH(leafSAH), - numLeaves(numLeaves), - numPrimsActive(numPrimsActive), - numPrimsTotal(numPrimsTotal), - numPrimBlocks(numPrimBlocks), - numBytes(numBytes) - { - for (size_t i=0; i<NHIST; i++) - numPrimBlocksHistogram[i] = 0; - } - - double sah(BVH* bvh) const { - return leafSAH/bvh->getLinearBounds().expectedHalfArea(); - } - - size_t bytes(BVH* bvh) const { - return numBytes; - } - - size_t size() const { - return numLeaves; - } - - double fillRateNom (BVH* bvh) const { return double(numPrimsActive); } - double fillRateDen (BVH* bvh) const { return double(numPrimsTotal); } - double fillRate (BVH* bvh) const { return fillRateNom(bvh)/fillRateDen(bvh); } - - __forceinline friend LeafStat operator+ ( const LeafStat& a, const LeafStat& b) - { - LeafStat stat(a.leafSAH + b.leafSAH, - a.numLeaves+b.numLeaves, - a.numPrimsActive+b.numPrimsActive, - a.numPrimsTotal+b.numPrimsTotal, - a.numPrimBlocks+b.numPrimBlocks, - a.numBytes+b.numBytes); - for (size_t i=0; i<NHIST; i++) { - stat.numPrimBlocksHistogram[i] += a.numPrimBlocksHistogram[i]; - stat.numPrimBlocksHistogram[i] += b.numPrimBlocksHistogram[i]; - } - return stat; - } - - std::string toString(BVH* bvh, double sahTotal, size_t bytesTotal) const - { - std::ostringstream stream; - stream.setf(std::ios::fixed, std::ios::floatfield); - stream << "sah = " << std::setw(7) << std::setprecision(3) << sah(bvh); - stream << " (" << std::setw(6) << std::setprecision(2) << 100.0*sah(bvh)/sahTotal << "%), "; - stream << "#bytes = " << std::setw(7) << std::setprecision(2) << double(bytes(bvh))/1E6 << " MB "; - stream << "(" << std::setw(6) << std::setprecision(2) << 100.0*double(bytes(bvh))/double(bytesTotal) << "%), "; - stream << "#nodes = " << std::setw(7) << numLeaves << " (" << std::setw(6) << std::setprecision(2) << 100.0*fillRate(bvh) << "% filled), "; - stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytes(bvh))/double(bvh->numPrimitives); - return stream.str(); - } - - std::string histToString() const - { - std::ostringstream stream; - stream.setf(std::ios::fixed, std::ios::floatfield); - for (size_t i=0; i<NHIST; i++) - stream << std::setw(6) << std::setprecision(2) << 100.0f*float(numPrimBlocksHistogram[i])/float(numLeaves) << "% "; - return stream.str(); - } - - public: - double leafSAH; //!< SAH of the leaves only - size_t numLeaves; //!< Number of leaf nodes. - size_t numPrimsActive; //!< Number of active primitives ( - size_t numPrimsTotal; //!< Number of active and inactive primitives - size_t numPrimBlocks; //!< Number of primitive blocks. - size_t numBytes; //!< Number of bytes of leaves. - size_t numPrimBlocksHistogram[8]; - }; - - public: - Statistics (size_t depth = 0, - LeafStat statLeaf = LeafStat(), - NodeStat<AABBNode> statAABBNodes = NodeStat<AABBNode>(), - NodeStat<OBBNode> statOBBNodes = NodeStat<OBBNode>(), - NodeStat<AABBNodeMB> statAABBNodesMB = NodeStat<AABBNodeMB>(), - NodeStat<AABBNodeMB4D> statAABBNodesMB4D = NodeStat<AABBNodeMB4D>(), - NodeStat<OBBNodeMB> statOBBNodesMB = NodeStat<OBBNodeMB>(), - NodeStat<QuantizedNode> statQuantizedNodes = NodeStat<QuantizedNode>()) - - : depth(depth), - statLeaf(statLeaf), - statAABBNodes(statAABBNodes), - statOBBNodes(statOBBNodes), - statAABBNodesMB(statAABBNodesMB), - statAABBNodesMB4D(statAABBNodesMB4D), - statOBBNodesMB(statOBBNodesMB), - statQuantizedNodes(statQuantizedNodes) {} - - double sah(BVH* bvh) const - { - return statLeaf.sah(bvh) + - statAABBNodes.sah(bvh) + - statOBBNodes.sah(bvh) + - statAABBNodesMB.sah(bvh) + - statAABBNodesMB4D.sah(bvh) + - statOBBNodesMB.sah(bvh) + - statQuantizedNodes.sah(bvh); - } - - size_t bytes(BVH* bvh) const { - return statLeaf.bytes(bvh) + - statAABBNodes.bytes() + - statOBBNodes.bytes() + - statAABBNodesMB.bytes() + - statAABBNodesMB4D.bytes() + - statOBBNodesMB.bytes() + - statQuantizedNodes.bytes(); - } - - size_t size() const - { - return statLeaf.size() + - statAABBNodes.size() + - statOBBNodes.size() + - statAABBNodesMB.size() + - statAABBNodesMB4D.size() + - statOBBNodesMB.size() + - statQuantizedNodes.size(); - } - - double fillRate (BVH* bvh) const - { - double nom = statLeaf.fillRateNom(bvh) + - statAABBNodes.fillRateNom() + - statOBBNodes.fillRateNom() + - statAABBNodesMB.fillRateNom() + - statAABBNodesMB4D.fillRateNom() + - statOBBNodesMB.fillRateNom() + - statQuantizedNodes.fillRateNom(); - double den = statLeaf.fillRateDen(bvh) + - statAABBNodes.fillRateDen() + - statOBBNodes.fillRateDen() + - statAABBNodesMB.fillRateDen() + - statAABBNodesMB4D.fillRateDen() + - statOBBNodesMB.fillRateDen() + - statQuantizedNodes.fillRateDen(); - return nom/den; - } - - friend Statistics operator+ ( const Statistics& a, const Statistics& b ) - { - return Statistics(max(a.depth,b.depth), - a.statLeaf + b.statLeaf, - a.statAABBNodes + b.statAABBNodes, - a.statOBBNodes + b.statOBBNodes, - a.statAABBNodesMB + b.statAABBNodesMB, - a.statAABBNodesMB4D + b.statAABBNodesMB4D, - a.statOBBNodesMB + b.statOBBNodesMB, - a.statQuantizedNodes + b.statQuantizedNodes); - } - - static Statistics add ( const Statistics& a, const Statistics& b ) { - return a+b; - } - - public: - size_t depth; - LeafStat statLeaf; - NodeStat<AABBNode> statAABBNodes; - NodeStat<OBBNode> statOBBNodes; - NodeStat<AABBNodeMB> statAABBNodesMB; - NodeStat<AABBNodeMB4D> statAABBNodesMB4D; - NodeStat<OBBNodeMB> statOBBNodesMB; - NodeStat<QuantizedNode> statQuantizedNodes; - }; - - public: - - /* Constructor gathers statistics. */ - BVHNStatistics (BVH* bvh); - - /*! Convert statistics into a string */ - std::string str(); - - double sah() const { - return stat.sah(bvh); - } - - size_t bytesUsed() const { - return stat.bytes(bvh); - } - - private: - Statistics statistics(NodeRef node, const double A, const BBox1f dt); - - private: - BVH* bvh; - Statistics stat; - }; - - typedef BVHNStatistics<4> BVH4Statistics; - typedef BVHNStatistics<8> BVH8Statistics; -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser1.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser1.h deleted file mode 100644 index 7f17084b81..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser1.h +++ /dev/null @@ -1,676 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh.h" -#include "node_intersector1.h" -#include "../common/stack_item.h" - -#define NEW_SORTING_CODE 1 - -namespace embree -{ - namespace isa - { - /*! BVH regular node traversal for single rays. */ - template<int N, int Nx, int types> - class BVHNNodeTraverser1Hit; - - /*! Helper functions for fast sorting using AVX512 instructions. */ -#if defined(__AVX512ER__) - - /* KNL code path */ - __forceinline void isort_update(vfloat16 &dist, vllong8 &ptr, const vfloat16 &d, const vllong8 &p) - { - const vfloat16 dist_shift = align_shift_right<15>(dist,dist); - const vllong8 ptr_shift = align_shift_right<7>(ptr,ptr); - const vbool16 m_geq = d >= dist; - const vbool16 m_geq_shift = m_geq << 1; - dist = select(m_geq,d,dist); - ptr = select(vboold8(m_geq),p,ptr); - dist = select(m_geq_shift,dist_shift,dist); - ptr = select(vboold8(m_geq_shift),ptr_shift,ptr); - } - - __forceinline void isort_quick_update(vfloat16 &dist, vllong8 &ptr, const vfloat16 &d, const vllong8 &p) - { - //dist = align_shift_right<15>(dist,d); - //ptr = align_shift_right<7>(ptr,p); - dist = align_shift_right<15>(dist,permute(d,vint16(zero))); - ptr = align_shift_right<7>(ptr,permute(p,vllong8(zero))); - } - - template<int N, int Nx, int types, class NodeRef, class BaseNode> - __forceinline void traverseClosestHitAVX512(NodeRef& cur, - size_t mask, - const vfloat<Nx>& tNear, - StackItemT<NodeRef>*& stackPtr, - StackItemT<NodeRef>* stackEnd) - { - assert(mask != 0); - const BaseNode* node = cur.baseNode(); - - vllong8 children( vllong<N>::loadu((void*)node->children) ); - children = vllong8::compact((int)mask,children); - vfloat16 distance = tNear; - distance = vfloat16::compact((int)mask,distance,tNear); - - cur = toScalar(children); - BVHN<N>::prefetch(cur,types); - - mask &= mask-1; - if (likely(mask == 0)) return; - - /* 2 hits: order A0 B0 */ - const vllong8 c0(children); - const vfloat16 d0(distance); - children = align_shift_right<1>(children,children); - distance = align_shift_right<1>(distance,distance); - const vllong8 c1(children); - const vfloat16 d1(distance); - - cur = toScalar(children); - BVHN<N>::prefetch(cur,types); - - /* a '<' keeps the order for equal distances, scenes like powerplant largely benefit from it */ - const vboolf16 m_dist = d0 < d1; - const vfloat16 dist_A0 = select(m_dist, d0, d1); - const vfloat16 dist_B0 = select(m_dist, d1, d0); - const vllong8 ptr_A0 = select(vboold8(m_dist), c0, c1); - const vllong8 ptr_B0 = select(vboold8(m_dist), c1, c0); - - mask &= mask-1; - if (likely(mask == 0)) { - cur = toScalar(ptr_A0); - stackPtr[0].ptr = toScalar(ptr_B0); - *(float*)&stackPtr[0].dist = toScalar(dist_B0); - stackPtr++; - return; - } - - /* 3 hits: order A1 B1 C1 */ - - children = align_shift_right<1>(children,children); - distance = align_shift_right<1>(distance,distance); - - const vllong8 c2(children); - const vfloat16 d2(distance); - - cur = toScalar(children); - BVHN<N>::prefetch(cur,types); - - const vboolf16 m_dist1 = dist_A0 <= d2; - const vfloat16 dist_tmp_B1 = select(m_dist1, d2, dist_A0); - const vllong8 ptr_A1 = select(vboold8(m_dist1), ptr_A0, c2); - const vllong8 ptr_tmp_B1 = select(vboold8(m_dist1), c2, ptr_A0); - - const vboolf16 m_dist2 = dist_B0 <= dist_tmp_B1; - const vfloat16 dist_B1 = select(m_dist2, dist_B0 , dist_tmp_B1); - const vfloat16 dist_C1 = select(m_dist2, dist_tmp_B1, dist_B0); - const vllong8 ptr_B1 = select(vboold8(m_dist2), ptr_B0, ptr_tmp_B1); - const vllong8 ptr_C1 = select(vboold8(m_dist2), ptr_tmp_B1, ptr_B0); - - mask &= mask-1; - if (likely(mask == 0)) { - cur = toScalar(ptr_A1); - stackPtr[0].ptr = toScalar(ptr_C1); - *(float*)&stackPtr[0].dist = toScalar(dist_C1); - stackPtr[1].ptr = toScalar(ptr_B1); - *(float*)&stackPtr[1].dist = toScalar(dist_B1); - stackPtr+=2; - return; - } - - /* 4 hits: order A2 B2 C2 D2 */ - - const vfloat16 dist_A1 = select(m_dist1, dist_A0, d2); - - children = align_shift_right<1>(children,children); - distance = align_shift_right<1>(distance,distance); - - const vllong8 c3(children); - const vfloat16 d3(distance); - - cur = toScalar(children); - BVHN<N>::prefetch(cur,types); - - const vboolf16 m_dist3 = dist_A1 <= d3; - const vfloat16 dist_tmp_B2 = select(m_dist3, d3, dist_A1); - const vllong8 ptr_A2 = select(vboold8(m_dist3), ptr_A1, c3); - const vllong8 ptr_tmp_B2 = select(vboold8(m_dist3), c3, ptr_A1); - - const vboolf16 m_dist4 = dist_B1 <= dist_tmp_B2; - const vfloat16 dist_B2 = select(m_dist4, dist_B1 , dist_tmp_B2); - const vfloat16 dist_tmp_C2 = select(m_dist4, dist_tmp_B2, dist_B1); - const vllong8 ptr_B2 = select(vboold8(m_dist4), ptr_B1, ptr_tmp_B2); - const vllong8 ptr_tmp_C2 = select(vboold8(m_dist4), ptr_tmp_B2, ptr_B1); - - const vboolf16 m_dist5 = dist_C1 <= dist_tmp_C2; - const vfloat16 dist_C2 = select(m_dist5, dist_C1 , dist_tmp_C2); - const vfloat16 dist_D2 = select(m_dist5, dist_tmp_C2, dist_C1); - const vllong8 ptr_C2 = select(vboold8(m_dist5), ptr_C1, ptr_tmp_C2); - const vllong8 ptr_D2 = select(vboold8(m_dist5), ptr_tmp_C2, ptr_C1); - - mask &= mask-1; - if (likely(mask == 0)) { - cur = toScalar(ptr_A2); - stackPtr[0].ptr = toScalar(ptr_D2); - *(float*)&stackPtr[0].dist = toScalar(dist_D2); - stackPtr[1].ptr = toScalar(ptr_C2); - *(float*)&stackPtr[1].dist = toScalar(dist_C2); - stackPtr[2].ptr = toScalar(ptr_B2); - *(float*)&stackPtr[2].dist = toScalar(dist_B2); - stackPtr+=3; - return; - } - - /* >=5 hits: reverse to descending order for writing to stack */ - - const size_t hits = 4 + popcnt(mask); - const vfloat16 dist_A2 = select(m_dist3, dist_A1, d3); - vfloat16 dist(neg_inf); - vllong8 ptr(zero); - - - isort_quick_update(dist,ptr,dist_A2,ptr_A2); - isort_quick_update(dist,ptr,dist_B2,ptr_B2); - isort_quick_update(dist,ptr,dist_C2,ptr_C2); - isort_quick_update(dist,ptr,dist_D2,ptr_D2); - - do { - - children = align_shift_right<1>(children,children); - distance = align_shift_right<1>(distance,distance); - - cur = toScalar(children); - BVHN<N>::prefetch(cur,types); - - const vfloat16 new_dist(permute(distance,vint16(zero))); - const vllong8 new_ptr(permute(children,vllong8(zero))); - - mask &= mask-1; - isort_update(dist,ptr,new_dist,new_ptr); - - } while(mask); - - const vboold8 m_stack_ptr(0x55); // 10101010 (lsb -> msb) - const vboolf16 m_stack_dist(0x4444); // 0010001000100010 (lsb -> msb) - - /* extract current noderef */ - cur = toScalar(permute(ptr,vllong8(hits-1))); - /* rearrange pointers to beginning of 16 bytes block */ - vllong8 stackElementA0; - stackElementA0 = vllong8::expand(m_stack_ptr,ptr,stackElementA0); - /* put distances in between */ - vuint16 stackElementA1((__m512i)stackElementA0); - stackElementA1 = vuint16::expand(m_stack_dist,asUInt(dist),stackElementA1); - /* write out first 4 x 16 bytes block to stack */ - vuint16::storeu(stackPtr,stackElementA1); - /* get upper half of dist and ptr */ - dist = align_shift_right<4>(dist,dist); - ptr = align_shift_right<4>(ptr,ptr); - /* assemble and write out second block */ - vllong8 stackElementB0; - stackElementB0 = vllong8::expand(m_stack_ptr,ptr,stackElementB0); - vuint16 stackElementB1((__m512i)stackElementB0); - stackElementB1 = vuint16::expand(m_stack_dist,asUInt(dist),stackElementB1); - vuint16::storeu(stackPtr + 4,stackElementB1); - /* increase stack pointer */ - stackPtr += hits-1; - } -#endif - -#if defined(__AVX512VL__) // SKX - - template<int N> - __forceinline void isort_update(vint<N> &dist, const vint<N> &d) - { - const vint<N> dist_shift = align_shift_right<N-1>(dist,dist); - const vboolf<N> m_geq = d >= dist; - const vboolf<N> m_geq_shift = m_geq << 1; - dist = select(m_geq,d,dist); - dist = select(m_geq_shift,dist_shift,dist); - } - - template<int N> - __forceinline void isort_quick_update(vint<N> &dist, const vint<N> &d) { - dist = align_shift_right<N-1>(dist,permute(d,vint<N>(zero))); - } - - __forceinline size_t permuteExtract(const vint8& index, const vllong4& n0, const vllong4& n1) { - return toScalar(permutex2var((__m256i)index,n0,n1)); - } - - __forceinline float permuteExtract(const vint8& index, const vfloat8& n) { - return toScalar(permute(n,index)); - } - -#endif - - /* Specialization for BVH4. */ - template<int Nx, int types> - class BVHNNodeTraverser1Hit<4, Nx, types> - { - typedef BVH4 BVH; - typedef BVH4::NodeRef NodeRef; - typedef BVH4::BaseNode BaseNode; - - - public: - /* Traverses a node with at least one hit child. Optimized for finding the closest hit (intersection). */ - static __forceinline void traverseClosestHit(NodeRef& cur, - size_t mask, - const vfloat<Nx>& tNear, - StackItemT<NodeRef>*& stackPtr, - StackItemT<NodeRef>* stackEnd) - { - assert(mask != 0); -#if defined(__AVX512ER__) - traverseClosestHitAVX512<4,Nx,types,NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd); -#else - const BaseNode* node = cur.baseNode(); - - /*! one child is hit, continue with that child */ - size_t r = bscf(mask); - cur = node->child(r); - BVH::prefetch(cur,types); - if (likely(mask == 0)) { - assert(cur != BVH::emptyNode); - return; - } - - /*! two children are hit, push far child, and continue with closer child */ - NodeRef c0 = cur; - const unsigned int d0 = ((unsigned int*)&tNear)[r]; - r = bscf(mask); - NodeRef c1 = node->child(r); - BVH::prefetch(c1,types); - const unsigned int d1 = ((unsigned int*)&tNear)[r]; - assert(c0 != BVH::emptyNode); - assert(c1 != BVH::emptyNode); - if (likely(mask == 0)) { - assert(stackPtr < stackEnd); - if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; } - else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; } - } - -#if NEW_SORTING_CODE == 1 - vint4 s0((size_t)c0,(size_t)d0); - vint4 s1((size_t)c1,(size_t)d1); - r = bscf(mask); - NodeRef c2 = node->child(r); BVH::prefetch(c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r]; - vint4 s2((size_t)c2,(size_t)d2); - /* 3 hits */ - if (likely(mask == 0)) { - StackItemT<NodeRef>::sort3(s0,s1,s2); - *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; - cur = toSizeT(s2); - stackPtr+=2; - return; - } - r = bscf(mask); - NodeRef c3 = node->child(r); BVH::prefetch(c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r]; - vint4 s3((size_t)c3,(size_t)d3); - /* 4 hits */ - StackItemT<NodeRef>::sort4(s0,s1,s2,s3); - *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2; - cur = toSizeT(s3); - stackPtr+=3; -#else - /*! Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. */ - assert(stackPtr < stackEnd); - stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; - assert(stackPtr < stackEnd); - stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; - - /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ - assert(stackPtr < stackEnd); - r = bscf(mask); - NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; - assert(c != BVH::emptyNode); - if (likely(mask == 0)) { - sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); - cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; - return; - } - - /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ - assert(stackPtr < stackEnd); - r = bscf(mask); - c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; - assert(c != BVH::emptyNode); - sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); - cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; -#endif -#endif - } - - /* Traverses a node with at least one hit child. Optimized for finding any hit (occlusion). */ - static __forceinline void traverseAnyHit(NodeRef& cur, - size_t mask, - const vfloat<Nx>& tNear, - NodeRef*& stackPtr, - NodeRef* stackEnd) - { - const BaseNode* node = cur.baseNode(); - - /*! one child is hit, continue with that child */ - size_t r = bscf(mask); - cur = node->child(r); - BVH::prefetch(cur,types); - - /* simpler in sequence traversal order */ - assert(cur != BVH::emptyNode); - if (likely(mask == 0)) return; - assert(stackPtr < stackEnd); - *stackPtr = cur; stackPtr++; - - for (; ;) - { - r = bscf(mask); - cur = node->child(r); BVH::prefetch(cur,types); - assert(cur != BVH::emptyNode); - if (likely(mask == 0)) return; - assert(stackPtr < stackEnd); - *stackPtr = cur; stackPtr++; - } - } - }; - - /* Specialization for BVH8. */ - template<int Nx, int types> - class BVHNNodeTraverser1Hit<8, Nx, types> - { - typedef BVH8 BVH; - typedef BVH8::NodeRef NodeRef; - typedef BVH8::BaseNode BaseNode; - -#if defined(__AVX512VL__) - template<class NodeRef, class BaseNode> - static __forceinline void traverseClosestHitAVX512VL8(NodeRef& cur, - size_t mask, - const vfloat8& tNear, - StackItemT<NodeRef>*& stackPtr, - StackItemT<NodeRef>* stackEnd) - { - assert(mask != 0); - const BaseNode* node = cur.baseNode(); - const vllong4 n0 = vllong4::loadu((vllong4*)&node->children[0]); - const vllong4 n1 = vllong4::loadu((vllong4*)&node->children[4]); - vint8 distance_i = (asInt(tNear) & 0xfffffff8) | vint8(step); - distance_i = vint8::compact((int)mask,distance_i,distance_i); - cur = permuteExtract(distance_i,n0,n1); - BVH::prefetch(cur,types); - - mask &= mask-1; - if (likely(mask == 0)) return; - - /* 2 hits: order A0 B0 */ - const vint8 d0(distance_i); - const vint8 d1(shuffle<1>(distance_i)); - cur = permuteExtract(d1,n0,n1); - BVH::prefetch(cur,types); - - const vint8 dist_A0 = min(d0, d1); - const vint8 dist_B0 = max(d0, d1); - assert(dist_A0[0] < dist_B0[0]); - - mask &= mask-1; - if (likely(mask == 0)) { - cur = permuteExtract(dist_A0,n0,n1); - stackPtr[0].ptr = permuteExtract(dist_B0,n0,n1); - *(float*)&stackPtr[0].dist = permuteExtract(dist_B0,tNear); - stackPtr++; - return; - } - - /* 3 hits: order A1 B1 C1 */ - - const vint8 d2(shuffle<2>(distance_i)); - cur = permuteExtract(d2,n0,n1); - BVH::prefetch(cur,types); - - const vint8 dist_A1 = min(dist_A0,d2); - const vint8 dist_tmp_B1 = max(dist_A0,d2); - const vint8 dist_B1 = min(dist_B0,dist_tmp_B1); - const vint8 dist_C1 = max(dist_B0,dist_tmp_B1); - assert(dist_A1[0] < dist_B1[0]); - assert(dist_B1[0] < dist_C1[0]); - - mask &= mask-1; - if (likely(mask == 0)) { - cur = permuteExtract(dist_A1,n0,n1); - stackPtr[0].ptr = permuteExtract(dist_C1,n0,n1); - *(float*)&stackPtr[0].dist = permuteExtract(dist_C1,tNear); - stackPtr[1].ptr = permuteExtract(dist_B1,n0,n1); - *(float*)&stackPtr[1].dist = permuteExtract(dist_B1,tNear); - stackPtr+=2; - return; - } - - /* 4 hits: order A2 B2 C2 D2 */ - - const vint8 d3(shuffle<3>(distance_i)); - cur = permuteExtract(d3,n0,n1); - BVH::prefetch(cur,types); - - const vint8 dist_A2 = min(dist_A1,d3); - const vint8 dist_tmp_B2 = max(dist_A1,d3); - const vint8 dist_B2 = min(dist_B1,dist_tmp_B2); - const vint8 dist_tmp_C2 = max(dist_B1,dist_tmp_B2); - const vint8 dist_C2 = min(dist_C1,dist_tmp_C2); - const vint8 dist_D2 = max(dist_C1,dist_tmp_C2); - assert(dist_A2[0] < dist_B2[0]); - assert(dist_B2[0] < dist_C2[0]); - assert(dist_C2[0] < dist_D2[0]); - - mask &= mask-1; - if (likely(mask == 0)) { - cur = permuteExtract(dist_A2,n0,n1); - stackPtr[0].ptr = permuteExtract(dist_D2,n0,n1); - *(float*)&stackPtr[0].dist = permuteExtract(dist_D2,tNear); - stackPtr[1].ptr = permuteExtract(dist_C2,n0,n1); - *(float*)&stackPtr[1].dist = permuteExtract(dist_C2,tNear); - stackPtr[2].ptr = permuteExtract(dist_B2,n0,n1); - *(float*)&stackPtr[2].dist = permuteExtract(dist_B2,tNear); - stackPtr+=3; - return; - } - - /* >=5 hits: reverse to descending order for writing to stack */ - - distance_i = align_shift_right<3>(distance_i,distance_i); - const size_t hits = 4 + popcnt(mask); - vint8 dist(INT_MIN); // this will work with -0.0f (0x80000000) as distance, isort_update uses >= to insert - - isort_quick_update(dist,dist_A2); - isort_quick_update(dist,dist_B2); - isort_quick_update(dist,dist_C2); - isort_quick_update(dist,dist_D2); - - do { - - distance_i = align_shift_right<1>(distance_i,distance_i); - cur = permuteExtract(distance_i,n0,n1); - BVH::prefetch(cur,types); - const vint8 new_dist(permute(distance_i,vint8(zero))); - mask &= mask-1; - isort_update(dist,new_dist); - - } while(mask); - - for (size_t i=0; i<7; i++) - assert(dist[i+0]>=dist[i+1]); - - for (size_t i=0;i<hits-1;i++) - { - stackPtr->ptr = permuteExtract(dist,n0,n1); - *(float*)&stackPtr->dist = permuteExtract(dist,tNear); - dist = align_shift_right<1>(dist,dist); - stackPtr++; - } - cur = permuteExtract(dist,n0,n1); - } -#endif - - public: - static __forceinline void traverseClosestHit(NodeRef& cur, - size_t mask, - const vfloat<Nx>& tNear, - StackItemT<NodeRef>*& stackPtr, - StackItemT<NodeRef>* stackEnd) - { - assert(mask != 0); -#if defined(__AVX512ER__) - traverseClosestHitAVX512<8,Nx,types,NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd); -#elif defined(__AVX512VL__) - traverseClosestHitAVX512VL8<NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd); -#else - - const BaseNode* node = cur.baseNode(); - - /*! one child is hit, continue with that child */ - size_t r = bscf(mask); - cur = node->child(r); - BVH::prefetch(cur,types); - if (likely(mask == 0)) { - assert(cur != BVH::emptyNode); - return; - } - - /*! two children are hit, push far child, and continue with closer child */ - NodeRef c0 = cur; - const unsigned int d0 = ((unsigned int*)&tNear)[r]; - r = bscf(mask); - NodeRef c1 = node->child(r); - BVH::prefetch(c1,types); - const unsigned int d1 = ((unsigned int*)&tNear)[r]; - - assert(c0 != BVH::emptyNode); - assert(c1 != BVH::emptyNode); - if (likely(mask == 0)) { - assert(stackPtr < stackEnd); - if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; } - else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; } - } -#if NEW_SORTING_CODE == 1 - vint4 s0((size_t)c0,(size_t)d0); - vint4 s1((size_t)c1,(size_t)d1); - - r = bscf(mask); - NodeRef c2 = node->child(r); BVH::prefetch(c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r]; - vint4 s2((size_t)c2,(size_t)d2); - /* 3 hits */ - if (likely(mask == 0)) { - StackItemT<NodeRef>::sort3(s0,s1,s2); - *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; - cur = toSizeT(s2); - stackPtr+=2; - return; - } - r = bscf(mask); - NodeRef c3 = node->child(r); BVH::prefetch(c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r]; - vint4 s3((size_t)c3,(size_t)d3); - /* 4 hits */ - if (likely(mask == 0)) { - StackItemT<NodeRef>::sort4(s0,s1,s2,s3); - *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2; - cur = toSizeT(s3); - stackPtr+=3; - return; - } - *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2; *(vint4*)&stackPtr[3] = s3; - /*! fallback case if more than 4 children are hit */ - StackItemT<NodeRef>* stackFirst = stackPtr; - stackPtr+=4; - while (1) - { - assert(stackPtr < stackEnd); - r = bscf(mask); - NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = *(unsigned int*)&tNear[r]; - const vint4 s((size_t)c,(size_t)d); - *(vint4*)stackPtr++ = s; - assert(c != BVH::emptyNode); - if (unlikely(mask == 0)) break; - } - sort(stackFirst,stackPtr); - cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; -#else - /*! Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. */ - assert(stackPtr < stackEnd); - stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; - assert(stackPtr < stackEnd); - stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; - - /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ - assert(stackPtr < stackEnd); - r = bscf(mask); - NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; - assert(c != BVH::emptyNode); - if (likely(mask == 0)) { - sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); - cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; - return; - } - - /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ - assert(stackPtr < stackEnd); - r = bscf(mask); - c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; - assert(c != BVH::emptyNode); - if (likely(mask == 0)) { - sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); - cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; - return; - } - /*! fallback case if more than 4 children are hit */ - StackItemT<NodeRef>* stackFirst = stackPtr-4; - while (1) - { - assert(stackPtr < stackEnd); - r = bscf(mask); - c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; - assert(c != BVH::emptyNode); - if (unlikely(mask == 0)) break; - } - sort(stackFirst,stackPtr); - cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; -#endif -#endif - } - - static __forceinline void traverseAnyHit(NodeRef& cur, - size_t mask, - const vfloat<Nx>& tNear, - NodeRef*& stackPtr, - NodeRef* stackEnd) - { - const BaseNode* node = cur.baseNode(); - - /*! one child is hit, continue with that child */ - size_t r = bscf(mask); - cur = node->child(r); - BVH::prefetch(cur,types); - - /* simpler in sequence traversal order */ - assert(cur != BVH::emptyNode); - if (likely(mask == 0)) return; - assert(stackPtr < stackEnd); - *stackPtr = cur; stackPtr++; - - for (; ;) - { - r = bscf(mask); - cur = node->child(r); BVH::prefetch(cur,types); - assert(cur != BVH::emptyNode); - if (likely(mask == 0)) return; - assert(stackPtr < stackEnd); - *stackPtr = cur; stackPtr++; - } - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser_stream.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser_stream.h deleted file mode 100644 index 9c603babf0..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser_stream.h +++ /dev/null @@ -1,154 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh.h" -#include "../common/ray.h" -#include "../common/stack_item.h" - -namespace embree -{ - namespace isa - { - template<int N, int Nx, int types> - class BVHNNodeTraverserStreamHitCoherent - { - typedef BVHN<N> BVH; - typedef typename BVH::NodeRef NodeRef; - typedef typename BVH::BaseNode BaseNode; - - public: - template<class T> - static __forceinline void traverseClosestHit(NodeRef& cur, - size_t& m_trav_active, - const vbool<Nx>& vmask, - const vfloat<Nx>& tNear, - const T* const tMask, - StackItemMaskCoherent*& stackPtr) - { - const NodeRef parent = cur; - size_t mask = movemask(vmask); - assert(mask != 0); - const BaseNode* node = cur.baseNode(); - - /*! one child is hit, continue with that child */ - const size_t r0 = bscf(mask); - assert(r0 < 8); - cur = node->child(r0); - BVHN<N>::prefetch(cur,types); - m_trav_active = tMask[r0]; - assert(cur != BVH::emptyNode); - if (unlikely(mask == 0)) return; - - const unsigned int* const tNear_i = (unsigned int*)&tNear; - - /*! two children are hit, push far child, and continue with closer child */ - NodeRef c0 = cur; - unsigned int d0 = tNear_i[r0]; - const size_t r1 = bscf(mask); - assert(r1 < 8); - NodeRef c1 = node->child(r1); - BVHN<N>::prefetch(c1,types); - unsigned int d1 = tNear_i[r1]; - - assert(c0 != BVH::emptyNode); - assert(c1 != BVH::emptyNode); - if (likely(mask == 0)) { - if (d0 < d1) { - assert(tNear[r1] >= 0.0f); - stackPtr->mask = tMask[r1]; - stackPtr->parent = parent; - stackPtr->child = c1; - stackPtr++; - cur = c0; - m_trav_active = tMask[r0]; - return; - } - else { - assert(tNear[r0] >= 0.0f); - stackPtr->mask = tMask[r0]; - stackPtr->parent = parent; - stackPtr->child = c0; - stackPtr++; - cur = c1; - m_trav_active = tMask[r1]; - return; - } - } - - /*! slow path for more than two hits */ - size_t hits = movemask(vmask); - const vint<Nx> dist_i = select(vmask, (asInt(tNear) & 0xfffffff8) | vint<Nx>(step), 0); - #if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL - const vint<N> tmp = extractN<N,0>(dist_i); - const vint<Nx> dist_i_sorted = usort_descending(tmp); - #else - const vint<Nx> dist_i_sorted = usort_descending(dist_i); - #endif - const vint<Nx> sorted_index = dist_i_sorted & 7; - - size_t i = 0; - for (;;) - { - const unsigned int index = sorted_index[i]; - assert(index < 8); - cur = node->child(index); - m_trav_active = tMask[index]; - assert(m_trav_active); - BVHN<N>::prefetch(cur,types); - bscf(hits); - if (unlikely(hits==0)) break; - i++; - assert(cur != BVH::emptyNode); - assert(tNear[index] >= 0.0f); - stackPtr->mask = m_trav_active; - stackPtr->parent = parent; - stackPtr->child = cur; - stackPtr++; - } - } - - template<class T> - static __forceinline void traverseAnyHit(NodeRef& cur, - size_t& m_trav_active, - const vbool<Nx>& vmask, - const T* const tMask, - StackItemMaskCoherent*& stackPtr) - { - const NodeRef parent = cur; - size_t mask = movemask(vmask); - assert(mask != 0); - const BaseNode* node = cur.baseNode(); - - /*! one child is hit, continue with that child */ - size_t r = bscf(mask); - cur = node->child(r); - BVHN<N>::prefetch(cur,types); - m_trav_active = tMask[r]; - - /* simple in order sequence */ - assert(cur != BVH::emptyNode); - if (likely(mask == 0)) return; - stackPtr->mask = m_trav_active; - stackPtr->parent = parent; - stackPtr->child = cur; - stackPtr++; - - for (; ;) - { - r = bscf(mask); - cur = node->child(r); - BVHN<N>::prefetch(cur,types); - m_trav_active = tMask[r]; - assert(cur != BVH::emptyNode); - if (likely(mask == 0)) return; - stackPtr->mask = m_trav_active; - stackPtr->parent = parent; - stackPtr->child = cur; - stackPtr++; - } - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector.h b/thirdparty/embree-aarch64/kernels/bvh/node_intersector.h deleted file mode 100644 index a978c0c459..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "bvh.h" - -namespace embree -{ - namespace isa - { - struct NearFarPrecalculations - { - size_t nearX, nearY, nearZ; - size_t farX, farY, farZ; - - __forceinline NearFarPrecalculations() {} - - __forceinline NearFarPrecalculations(const Vec3fa& dir, size_t N) - { - const size_t size = sizeof(float)*N; - nearX = (dir.x < 0.0f) ? 1*size : 0*size; - nearY = (dir.y < 0.0f) ? 3*size : 2*size; - nearZ = (dir.z < 0.0f) ? 5*size : 4*size; - farX = nearX ^ size; - farY = nearY ^ size; - farZ = nearZ ^ size; - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector1.h b/thirdparty/embree-aarch64/kernels/bvh/node_intersector1.h deleted file mode 100644 index aa0d4ba4d7..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector1.h +++ /dev/null @@ -1,1788 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "node_intersector.h" - -#if defined(__AVX2__) -#define __FMA_X4__ -#endif - -#if defined(__aarch64__) -#define __FMA_X4__ -#endif - - -namespace embree -{ - namespace isa - { - ////////////////////////////////////////////////////////////////////////////////////// - // Ray structure used in single-ray traversal - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, int Nx, bool robust> - struct TravRayBase; - - /* Base (without tnear and tfar) */ - template<int N, int Nx> - struct TravRayBase<N,Nx,false> - { - __forceinline TravRayBase() {} - - __forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir) - : org_xyz(ray_org), dir_xyz(ray_dir) - { - const Vec3fa ray_rdir = rcp_safe(ray_dir); - org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z); - dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z); - rdir = Vec3vf<N>(ray_rdir.x,ray_rdir.y,ray_rdir.z); -#if defined(__FMA_X4__) - const Vec3fa ray_org_rdir = ray_org*ray_rdir; -#if !defined(__aarch64__) - org_rdir = Vec3vf<N>(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); -#else - //for aarch64, we do not have msub equal instruction, so we negeate orig and use madd - //x86 will use msub - neg_org_rdir = Vec3vf<N>(-ray_org_rdir.x,-ray_org_rdir.y,-ray_org_rdir.z); -#endif -#endif - nearX = ray_rdir.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>); - nearY = ray_rdir.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>); - nearZ = ray_rdir.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>); - farX = nearX ^ sizeof(vfloat<N>); - farY = nearY ^ sizeof(vfloat<N>); - farZ = nearZ ^ sizeof(vfloat<N>); - -#if defined(__AVX512ER__) // KNL+ - /* optimization works only for 8-wide BVHs with 16-wide SIMD */ - const vint<16> id(step); - const vint<16> id2 = align_shift_right<16/2>(id, id); - permX = select(vfloat<16>(dir.x) >= 0.0f, id, id2); - permY = select(vfloat<16>(dir.y) >= 0.0f, id, id2); - permZ = select(vfloat<16>(dir.z) >= 0.0f, id, id2); -#endif - - } - - template<int K> - __forceinline TravRayBase(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, - const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ, - size_t flip = sizeof(vfloat<N>)) - { - org = Vec3vf<Nx>(ray_org.x[k], ray_org.y[k], ray_org.z[k]); - dir = Vec3vf<Nx>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]); - rdir = Vec3vf<Nx>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]); -#if defined(__FMA_X4__) -#if !defined(__aarch64__) - org_rdir = org*rdir; -#else - neg_org_rdir = -(org*rdir); -#endif -#endif - nearX = nearXYZ.x[k]; - nearY = nearXYZ.y[k]; - nearZ = nearXYZ.z[k]; - farX = nearX ^ flip; - farY = nearY ^ flip; - farZ = nearZ ^ flip; - -#if defined(__AVX512ER__) // KNL+ - /* optimization works only for 8-wide BVHs with 16-wide SIMD */ - const vint<16> id(step); - const vint<16> id2 = align_shift_right<16/2>(id, id); - permX = select(vfloat<16>(dir.x) >= 0.0f, id, id2); - permY = select(vfloat<16>(dir.y) >= 0.0f, id, id2); - permZ = select(vfloat<16>(dir.z) >= 0.0f, id, id2); -#endif - } - - Vec3fa org_xyz, dir_xyz; - Vec3vf<Nx> org, dir, rdir; -#if defined(__FMA_X4__) -#if !defined(__aarch64__) - Vec3vf<Nx> org_rdir; -#else - //aarch64 version are keeping negation of the org_rdir and use madd - //x86 uses msub - Vec3vf<Nx> neg_org_rdir; -#endif -#endif -#if defined(__AVX512ER__) // KNL+ - vint16 permX, permY, permZ; -#endif - - size_t nearX, nearY, nearZ; - size_t farX, farY, farZ; - }; - - /* Base (without tnear and tfar) */ - template<int N, int Nx> - struct TravRayBase<N,Nx,true> - { - __forceinline TravRayBase() {} - - __forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir) - : org_xyz(ray_org), dir_xyz(ray_dir) - { - const float round_down = 1.0f-3.0f*float(ulp); - const float round_up = 1.0f+3.0f*float(ulp); - const Vec3fa ray_rdir = 1.0f/zero_fix(ray_dir); - const Vec3fa ray_rdir_near = round_down*ray_rdir; - const Vec3fa ray_rdir_far = round_up *ray_rdir; - org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z); - dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z); - rdir_near = Vec3vf<N>(ray_rdir_near.x,ray_rdir_near.y,ray_rdir_near.z); - rdir_far = Vec3vf<N>(ray_rdir_far .x,ray_rdir_far .y,ray_rdir_far .z); - nearX = ray_rdir_near.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>); - nearY = ray_rdir_near.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>); - nearZ = ray_rdir_near.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>); - farX = nearX ^ sizeof(vfloat<N>); - farY = nearY ^ sizeof(vfloat<N>); - farZ = nearZ ^ sizeof(vfloat<N>); - -#if defined(__AVX512ER__) // KNL+ - /* optimization works only for 8-wide BVHs with 16-wide SIMD */ - const vint<16> id(step); - const vint<16> id2 = align_shift_right<16/2>(id, id); - permX = select(vfloat<16>(dir.x) >= 0.0f, id, id2); - permY = select(vfloat<16>(dir.y) >= 0.0f, id, id2); - permZ = select(vfloat<16>(dir.z) >= 0.0f, id, id2); -#endif - } - - template<int K> - __forceinline TravRayBase(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, - const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ, - size_t flip = sizeof(vfloat<N>)) - { - const vfloat<Nx> round_down = 1.0f-3.0f*float(ulp); - const vfloat<Nx> round_up = 1.0f+3.0f*float(ulp); - org = Vec3vf<Nx>(ray_org.x[k], ray_org.y[k], ray_org.z[k]); - dir = Vec3vf<Nx>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]); - rdir_near = round_down*Vec3vf<Nx>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]); - rdir_far = round_up *Vec3vf<Nx>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]); - - nearX = nearXYZ.x[k]; - nearY = nearXYZ.y[k]; - nearZ = nearXYZ.z[k]; - farX = nearX ^ flip; - farY = nearY ^ flip; - farZ = nearZ ^ flip; - -#if defined(__AVX512ER__) // KNL+ - /* optimization works only for 8-wide BVHs with 16-wide SIMD */ - const vint<16> id(step); - const vint<16> id2 = align_shift_right<16/2>(id, id); - permX = select(vfloat<16>(dir.x) >= 0.0f, id, id2); - permY = select(vfloat<16>(dir.y) >= 0.0f, id, id2); - permZ = select(vfloat<16>(dir.z) >= 0.0f, id, id2); -#endif - } - - Vec3fa org_xyz, dir_xyz; - Vec3vf<Nx> org, dir, rdir_near, rdir_far; -#if defined(__AVX512ER__) // KNL+ - vint16 permX, permY, permZ; -#endif - - size_t nearX, nearY, nearZ; - size_t farX, farY, farZ; - }; - - /* Full (with tnear and tfar) */ - template<int N, int Nx, bool robust> - struct TravRay : TravRayBase<N,Nx,robust> - { - __forceinline TravRay() {} - - __forceinline TravRay(const Vec3fa& ray_org, const Vec3fa& ray_dir, float ray_tnear, float ray_tfar) - : TravRayBase<N,Nx,robust>(ray_org, ray_dir), - tnear(ray_tnear), tfar(ray_tfar) {} - - template<int K> - __forceinline TravRay(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, - const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ, - float ray_tnear, float ray_tfar, - size_t flip = sizeof(vfloat<N>)) - : TravRayBase<N,Nx,robust>(k, ray_org, ray_dir, ray_rdir, nearXYZ, flip), - tnear(ray_tnear), tfar(ray_tfar) {} - - vfloat<Nx> tnear; - vfloat<Nx> tfar; - }; - - ////////////////////////////////////////////////////////////////////////////////////// - // Point Query structure used in single-ray traversal - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N> - struct TravPointQuery - { - __forceinline TravPointQuery() {} - - __forceinline TravPointQuery(const Vec3fa& query_org, const Vec3fa& query_rad) - { - org = Vec3vf<N>(query_org.x, query_org.y, query_org.z); - rad = Vec3vf<N>(query_rad.x, query_rad.y, query_rad.z); - } - - __forceinline vfloat<N> const& tfar() const { - return rad.x; - } - - Vec3vf<N> org, rad; - }; - - ////////////////////////////////////////////////////////////////////////////////////// - // point query - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N> - __forceinline size_t pointQuerySphereDistAndMask( - const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX, - vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ) - { - const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x; - const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y; - const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z; - dist = vX * vX + vY * vY + vZ * vZ; - const vbool<N> vmask = dist <= query.tfar()*query.tfar(); - const vbool<N> valid = minX <= maxX; - return movemask(vmask) & movemask(valid); - } - - template<int N> - __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) - { - const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x)); - const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y)); - const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z)); - const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x)); - const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y)); - const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z)); - return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ); - } - - template<int N> - __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) - { - const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x); - const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y); - const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z); - const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x); - const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y); - const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z); - const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0])); - const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0])); - const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0])); - const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0])); - const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0])); - const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0])); - return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ); - } - - template<int N> - __forceinline size_t pointQueryNodeSphereMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) - { - const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); - size_t mask = pointQueryNodeSphere(node, query, time, dist); - - if (unlikely(ref.isAABBNodeMB4D())) { - const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; - const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t); - mask &= movemask(vmask); - } - - return mask; - } - - template<int N> - __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) - { - const vfloat<N> start_x(node->start.x); - const vfloat<N> scale_x(node->scale.x); - const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x); - const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x); - const vfloat<N> start_y(node->start.y); - const vfloat<N> scale_y(node->scale.y); - const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y); - const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y); - const vfloat<N> start_z(node->start.z); - const vfloat<N> scale_z(node->scale.z); - const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z); - const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z); - return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask()); - } - - template<int N> - __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) - { - const vfloat<N> minX = node->dequantizeLowerX(time); - const vfloat<N> maxX = node->dequantizeUpperX(time); - const vfloat<N> minY = node->dequantizeLowerY(time); - const vfloat<N> maxY = node->dequantizeUpperY(time); - const vfloat<N> minZ = node->dequantizeLowerZ(time); - const vfloat<N> maxZ = node->dequantizeUpperZ(time); - return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask()); - } - - template<int N> - __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) - { - // TODO: point query - implement - const vbool<N> vmask = vbool<N>(true); - const size_t mask = movemask(vmask) & ((1<<N)-1); - dist = vfloat<N>(0.0f); - return mask; - } - - template<int N> - __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) - { - // TODO: point query - implement - const vbool<N> vmask = vbool<N>(true); - const size_t mask = movemask(vmask) & ((1<<N)-1); - dist = vfloat<N>(0.0f); - return mask; - } - - template<int N> - __forceinline size_t pointQueryAABBDistAndMask( - const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX, - vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ) - { - const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x; - const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y; - const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z; - dist = vX * vX + vY * vY + vZ * vZ; - const vbool<N> valid = minX <= maxX; - const vbool<N> vmask = !((maxX < query.org.x - query.rad.x) | (minX > query.org.x + query.rad.x) | - (maxY < query.org.y - query.rad.y) | (minY > query.org.y + query.rad.y) | - (maxZ < query.org.z - query.rad.z) | (minZ > query.org.z + query.rad.z)); - return movemask(vmask) & movemask(valid); - } - - template<int N> - __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) - { - const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x)); - const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y)); - const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z)); - const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x)); - const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y)); - const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z)); - return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ); - } - - template<int N> - __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) - { - const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x); - const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y); - const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z); - const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x); - const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y); - const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z); - const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0])); - const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0])); - const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0])); - const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0])); - const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0])); - const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0])); - return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ); - } - - template<int N> - __forceinline size_t pointQueryNodeAABBMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) - { - const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); - size_t mask = pointQueryNodeAABB(node, query, time, dist); - - if (unlikely(ref.isAABBNodeMB4D())) { - const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; - const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t); - mask &= movemask(vmask); - } - - return mask; - } - - template<int N> - __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) - { - const size_t mvalid = movemask(node->validMask()); - const vfloat<N> start_x(node->start.x); - const vfloat<N> scale_x(node->scale.x); - const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x); - const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x); - const vfloat<N> start_y(node->start.y); - const vfloat<N> scale_y(node->scale.y); - const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y); - const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y); - const vfloat<N> start_z(node->start.z); - const vfloat<N> scale_z(node->scale.z); - const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z); - const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z); - return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid; - } - - template<int N> - __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) - { - const size_t mvalid = movemask(node->validMask()); - const vfloat<N> minX = node->dequantizeLowerX(time); - const vfloat<N> maxX = node->dequantizeUpperX(time); - const vfloat<N> minY = node->dequantizeLowerY(time); - const vfloat<N> maxY = node->dequantizeUpperY(time); - const vfloat<N> minZ = node->dequantizeLowerZ(time); - const vfloat<N> maxZ = node->dequantizeUpperZ(time); - return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid; - } - - template<int N> - __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) - { - // TODO: point query - implement - const vbool<N> vmask = vbool<N>(true); - const size_t mask = movemask(vmask) & ((1<<N)-1); - dist = vfloat<N>(0.0f); - return mask; - } - - template<int N> - __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) - { - // TODO: point query - implement - const vbool<N> vmask = vbool<N>(true); - const size_t mask = movemask(vmask) & ((1<<N)-1); - dist = vfloat<N>(0.0f); - return mask; - } - - ////////////////////////////////////////////////////////////////////////////////////// - // Fast AABBNode intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, int Nx, bool robust> - __forceinline size_t intersectNode(const typename BVHN<N>::AABBNode* node, const TravRay<N,Nx,robust>& ray, vfloat<Nx>& dist); - - template<> - __forceinline size_t intersectNode<4,4>(const typename BVH4::AABBNode* node, const TravRay<4,4,false>& ray, vfloat4& dist) - { -#if defined(__FMA_X4__) -#if defined(__aarch64__) - const vfloat4 tNearX = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.neg_org_rdir.x); - const vfloat4 tNearY = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.neg_org_rdir.y); - const vfloat4 tNearZ = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.neg_org_rdir.z); - const vfloat4 tFarX = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.neg_org_rdir.x); - const vfloat4 tFarY = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.neg_org_rdir.y); - const vfloat4 tFarZ = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.neg_org_rdir.z); -#else - const vfloat4 tNearX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x); - const vfloat4 tNearY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y); - const vfloat4 tNearZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z); - const vfloat4 tFarX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x); - const vfloat4 tFarY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y); - const vfloat4 tFarZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z); -#endif -#else - const vfloat4 tNearX = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x; - const vfloat4 tNearY = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y; - const vfloat4 tNearZ = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z; - const vfloat4 tFarX = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x; - const vfloat4 tFarY = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y; - const vfloat4 tFarZ = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z; -#endif - -#if defined(__aarch64__) - const vfloat4 tNear = maxi(tNearX, tNearY, tNearZ, ray.tnear); - const vfloat4 tFar = mini(tFarX, tFarY, tFarZ, ray.tfar); - const vbool4 vmask = asInt(tNear) <= asInt(tFar); - const size_t mask = movemask(vmask); -#elif defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW - const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool4 vmask = asInt(tNear) > asInt(tFar); - const size_t mask = movemask(vmask) ^ ((1<<4)-1); -#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX - const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool4 vmask = asInt(tNear) <= asInt(tFar); - const size_t mask = movemask(vmask); -#else - const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool4 vmask = tNear <= tFar; - const size_t mask = movemask(vmask); -#endif - dist = tNear; - return mask; - } - -#if defined(__AVX__) - - template<> - __forceinline size_t intersectNode<8,8>(const typename BVH8::AABBNode* node, const TravRay<8,8,false>& ray, vfloat8& dist) - { -#if defined(__AVX2__) -#if defined(__aarch64__) - const vfloat8 tNearX = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.neg_org_rdir.x); - const vfloat8 tNearY = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.neg_org_rdir.y); - const vfloat8 tNearZ = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.neg_org_rdir.z); - const vfloat8 tFarX = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.neg_org_rdir.x); - const vfloat8 tFarY = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.neg_org_rdir.y); - const vfloat8 tFarZ = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.neg_org_rdir.z); -#else - const vfloat8 tNearX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x); - const vfloat8 tNearY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y); - const vfloat8 tNearZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z); - const vfloat8 tFarX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x); - const vfloat8 tFarY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y); - const vfloat8 tFarZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z); -#endif - -#else - const vfloat8 tNearX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x; - const vfloat8 tNearY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y; - const vfloat8 tNearZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z; - const vfloat8 tFarX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x; - const vfloat8 tFarY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y; - const vfloat8 tFarZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z; -#endif - -#if defined(__AVX2__) && !defined(__AVX512F__) // HSW - const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool8 vmask = asInt(tNear) > asInt(tFar); - const size_t mask = movemask(vmask) ^ ((1<<8)-1); -#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX - const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool8 vmask = asInt(tNear) <= asInt(tFar); - const size_t mask = movemask(vmask); -#else - const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool8 vmask = tNear <= tFar; - const size_t mask = movemask(vmask); -#endif - dist = tNear; - return mask; - } - -#endif - -#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL - - template<> - __forceinline size_t intersectNode<4,16>(const typename BVH4::AABBNode* node, const TravRay<4,16,false>& ray, vfloat16& dist) - { - const vfloat16 tNearX = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x); - const vfloat16 tNearY = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y); - const vfloat16 tNearZ = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z); - const vfloat16 tFarX = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x); - const vfloat16 tFarY = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y); - const vfloat16 tFarZ = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z); - const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool16 vmask = le(vbool16(0xf),tNear,tFar); - const size_t mask = movemask(vmask); - dist = tNear; - return mask; - } - - template<> - __forceinline size_t intersectNode<8,16>(const typename BVH8::AABBNode* node, const TravRay<8,16,false>& ray, vfloat16& dist) - { - const vllong8 invalid((size_t)BVH8::emptyNode); - const vboold8 m_valid(invalid != vllong8::loadu(node->children)); - const vfloat16 bminmaxX = permute(vfloat16::load((const float*)&node->lower_x), ray.permX); - const vfloat16 bminmaxY = permute(vfloat16::load((const float*)&node->lower_y), ray.permY); - const vfloat16 bminmaxZ = permute(vfloat16::load((const float*)&node->lower_z), ray.permZ); - const vfloat16 tNearFarX = msub(bminmaxX, ray.rdir.x, ray.org_rdir.x); - const vfloat16 tNearFarY = msub(bminmaxY, ray.rdir.y, ray.org_rdir.y); - const vfloat16 tNearFarZ = msub(bminmaxZ, ray.rdir.z, ray.org_rdir.z); - const vfloat16 tNear = max(tNearFarX, tNearFarY, tNearFarZ, ray.tnear); - const vfloat16 tFar = min(tNearFarX, tNearFarY, tNearFarZ, ray.tfar); - const vbool16 vmask = le(vboolf16(m_valid),tNear,align_shift_right<8>(tFar, tFar)); - const size_t mask = movemask(vmask); - dist = tNear; - return mask; - } - -#endif - - ////////////////////////////////////////////////////////////////////////////////////// - // Robust AABBNode intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, int Nx> - __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNode* node, const TravRay<N,Nx,true>& ray, vfloat<Nx>& dist) - { - const vfloat<N> tNearX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x; - const vfloat<N> tNearY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y; - const vfloat<N> tNearZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z; - const vfloat<N> tFarX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x; - const vfloat<N> tFarY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y; - const vfloat<N> tFarZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z; - const vfloat<N> tNear = max(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat<N> tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool<N> vmask = tNear <= tFar; - const size_t mask = movemask(vmask); - dist = tNear; - return mask; - } - -#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL - - template<> - __forceinline size_t intersectNodeRobust<4,16>(const typename BVHN<4>::AABBNode* node, const TravRay<4,16,true>& ray, vfloat<16>& dist) - { - const vfloat16 tNearX = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x; - const vfloat16 tNearY = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y; - const vfloat16 tNearZ = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z; - const vfloat16 tFarX = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x; - const vfloat16 tFarY = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y; - const vfloat16 tFarZ = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z; - const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool16 vmask = le((1 << 4)-1,tNear,tFar); - const size_t mask = movemask(vmask); - dist = tNear; - return mask; - } - - template<> - __forceinline size_t intersectNodeRobust<8,16>(const typename BVHN<8>::AABBNode* node, const TravRay<8,16,true>& ray, vfloat<16>& dist) - { - const vfloat16 tNearX = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x; - const vfloat16 tNearY = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y; - const vfloat16 tNearZ = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z; - const vfloat16 tFarX = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x; - const vfloat16 tFarY = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y; - const vfloat16 tFarZ = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z; - const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool16 vmask = le((1 << 8)-1,tNear,tFar); - const size_t mask = movemask(vmask); - dist = tNear; - return mask; - } - -#endif - - ////////////////////////////////////////////////////////////////////////////////////// - // Fast AABBNodeMB intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N> - __forceinline size_t intersectNode(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,N,false>& ray, const float time, vfloat<N>& dist) - { - const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX); - const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY); - const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ); - const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX); - const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY); - const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ); -#if defined(__FMA_X4__) -#if defined(__aarch64__) - const vfloat<N> tNearX = madd(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<N> tNearY = madd(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<N> tNearZ = madd(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<N> tFarX = madd(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<N> tFarY = madd(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<N> tFarZ = madd(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.neg_org_rdir.z); -#else - const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x); - const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y); - const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z); - const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x); - const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y); - const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z); -#endif -#else - const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x; - const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y; - const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z; - const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x; - const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y; - const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z; -#endif -#if defined(__FMA_X4__) && !defined(__AVX512F__) // HSW - const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool<N> vmask = asInt(tNear) > asInt(tFar); - const size_t mask = movemask(vmask) ^ ((1<<N)-1); -#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX - const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool<N> vmask = asInt(tNear) <= asInt(tFar); - const size_t mask = movemask(vmask); -#else - const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ); - const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ ); - const vbool<N> vmask = tNear <= tFar; - const size_t mask = movemask(vmask); -#endif - dist = tNear; - return mask; - } - - ////////////////////////////////////////////////////////////////////////////////////// - // Robust AABBNodeMB intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N> - __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,N,true>& ray, const float time, vfloat<N>& dist) - { - const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX); - const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY); - const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ); - const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x; - const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y; - const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z; - const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ); - const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX); - const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY); - const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ); - const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x; - const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y; - const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z; - const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ); - const size_t mask = movemask(tNear <= tFar); - dist = tNear; - return mask; - } - - ////////////////////////////////////////////////////////////////////////////////////// - // Fast AABBNodeMB4D intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N> - __forceinline size_t intersectNodeMB4D(const typename BVHN<N>::NodeRef ref, const TravRay<N,N,false>& ray, const float time, vfloat<N>& dist) - { - const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); - - const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX); - const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY); - const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ); - const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX); - const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY); - const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ); -#if defined (__FMA_X4__) -#if defined(__aarch64__) - const vfloat<N> tNearX = madd(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<N> tNearY = madd(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<N> tNearZ = madd(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<N> tFarX = madd(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<N> tFarY = madd(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<N> tFarZ = madd(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.neg_org_rdir.z); -#else - const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x); - const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y); - const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z); - const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x); - const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y); - const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z); -#endif -#else - const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x; - const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y; - const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z; - const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x; - const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y; - const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z; -#endif -#if defined(__FMA_X4__) && !defined(__AVX512F__) - const vfloat<N> tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray.tnear)); - const vfloat<N> tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray.tfar )); -#else - const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ); - const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ ); -#endif - vbool<N> vmask = tNear <= tFar; - if (unlikely(ref.isAABBNodeMB4D())) { - const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; - vmask &= (node1->lower_t <= time) & (time < node1->upper_t); - } - const size_t mask = movemask(vmask); - dist = tNear; - return mask; - } - - ////////////////////////////////////////////////////////////////////////////////////// - // Robust AABBNodeMB4D intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N> - __forceinline size_t intersectNodeMB4DRobust(const typename BVHN<N>::NodeRef ref, const TravRay<N,N,true>& ray, const float time, vfloat<N>& dist) - { - const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); - - const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX); - const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY); - const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ); - const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x; - const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y; - const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z; - const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ); - const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX); - const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY); - const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ); - const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x; - const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y; - const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z; - const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ); - vbool<N> vmask = tNear <= tFar; - if (unlikely(ref.isAABBNodeMB4D())) { - const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; - vmask &= (node1->lower_t <= time) & (time < node1->upper_t); - } - const size_t mask = movemask(vmask); - dist = tNear; - return mask; - } - - ////////////////////////////////////////////////////////////////////////////////////// - // Fast QuantizedBaseNode intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, int Nx, bool robust> - __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,Nx,robust>& ray, vfloat<Nx>& dist); - - template<> - __forceinline size_t intersectNode<4,4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,4,false>& ray, vfloat4& dist) - { - const size_t mvalid = movemask(node->validMask()); - const vfloat4 start_x(node->start.x); - const vfloat4 scale_x(node->scale.x); - const vfloat4 lower_x = madd(node->dequantize<4>(ray.nearX >> 2),scale_x,start_x); - const vfloat4 upper_x = madd(node->dequantize<4>(ray.farX >> 2),scale_x,start_x); - const vfloat4 start_y(node->start.y); - const vfloat4 scale_y(node->scale.y); - const vfloat4 lower_y = madd(node->dequantize<4>(ray.nearY >> 2),scale_y,start_y); - const vfloat4 upper_y = madd(node->dequantize<4>(ray.farY >> 2),scale_y,start_y); - const vfloat4 start_z(node->start.z); - const vfloat4 scale_z(node->scale.z); - const vfloat4 lower_z = madd(node->dequantize<4>(ray.nearZ >> 2),scale_z,start_z); - const vfloat4 upper_z = madd(node->dequantize<4>(ray.farZ >> 2),scale_z,start_z); - -#if defined(__FMA_X4__) -#if defined(__aarch64__) - const vfloat4 tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat4 tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat4 tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z); - const vfloat4 tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat4 tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat4 tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z); -#else - const vfloat4 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); - const vfloat4 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); - const vfloat4 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); - const vfloat4 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); - const vfloat4 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); - const vfloat4 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); -#endif -#else - const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir.x; - const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir.y; - const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir.z; - const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir.x; - const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir.y; - const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir.z; -#endif - -#if (defined(__aarch64__) && defined(BUILD_IOS)) || defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW - const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool4 vmask = asInt(tNear) > asInt(tFar); - const size_t mask = movemask(vmask) ^ ((1<<4)-1); -#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX - const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool4 vmask = asInt(tNear) <= asInt(tFar); - const size_t mask = movemask(vmask); -#else - const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool4 vmask = tNear <= tFar; - const size_t mask = movemask(vmask); -#endif - dist = tNear; - return mask & mvalid; - } - - template<> - __forceinline size_t intersectNode<4,4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,4,true>& ray, vfloat4& dist) - { - const size_t mvalid = movemask(node->validMask()); - const vfloat4 start_x(node->start.x); - const vfloat4 scale_x(node->scale.x); - const vfloat4 lower_x = madd(node->dequantize<4>(ray.nearX >> 2),scale_x,start_x); - const vfloat4 upper_x = madd(node->dequantize<4>(ray.farX >> 2),scale_x,start_x); - const vfloat4 start_y(node->start.y); - const vfloat4 scale_y(node->scale.y); - const vfloat4 lower_y = madd(node->dequantize<4>(ray.nearY >> 2),scale_y,start_y); - const vfloat4 upper_y = madd(node->dequantize<4>(ray.farY >> 2),scale_y,start_y); - const vfloat4 start_z(node->start.z); - const vfloat4 scale_z(node->scale.z); - const vfloat4 lower_z = madd(node->dequantize<4>(ray.nearZ >> 2),scale_z,start_z); - const vfloat4 upper_z = madd(node->dequantize<4>(ray.farZ >> 2),scale_z,start_z); - - const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x; - const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y; - const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z; - const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x; - const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y; - const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z; - - const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool4 vmask = tNear <= tFar; - const size_t mask = movemask(vmask); - dist = tNear; - return mask & mvalid; - } - - -#if defined(__AVX__) - - template<> - __forceinline size_t intersectNode<8,8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,8,false>& ray, vfloat8& dist) - { - const size_t mvalid = movemask(node->validMask()); - const vfloat8 start_x(node->start.x); - const vfloat8 scale_x(node->scale.x); - const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x); - const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x); - const vfloat8 start_y(node->start.y); - const vfloat8 scale_y(node->scale.y); - const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y); - const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y); - const vfloat8 start_z(node->start.z); - const vfloat8 scale_z(node->scale.z); - const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z); - const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z); - -#if defined(__AVX2__) -#if defined(__aarch64__) - const vfloat8 tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat8 tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat8 tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z); - const vfloat8 tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat8 tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat8 tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z); -#else - const vfloat8 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); - const vfloat8 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); - const vfloat8 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); - const vfloat8 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); - const vfloat8 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); - const vfloat8 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); -#endif -#else - const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir.x; - const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir.y; - const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir.z; - const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir.x; - const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir.y; - const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir.z; -#endif - -#if defined(__AVX2__) && !defined(__AVX512F__) // HSW - const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool8 vmask = asInt(tNear) > asInt(tFar); - const size_t mask = movemask(vmask) ^ ((1<<8)-1); -#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX - const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool8 vmask = asInt(tNear) <= asInt(tFar); - const size_t mask = movemask(vmask); -#else - const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool8 vmask = tNear <= tFar; - const size_t mask = movemask(vmask); -#endif - dist = tNear; - return mask & mvalid; - } - - template<> - __forceinline size_t intersectNode<8,8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,8,true>& ray, vfloat8& dist) - { - const size_t mvalid = movemask(node->validMask()); - const vfloat8 start_x(node->start.x); - const vfloat8 scale_x(node->scale.x); - const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x); - const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x); - const vfloat8 start_y(node->start.y); - const vfloat8 scale_y(node->scale.y); - const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y); - const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y); - const vfloat8 start_z(node->start.z); - const vfloat8 scale_z(node->scale.z); - const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z); - const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z); - - const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x; - const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y; - const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z; - const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x; - const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y; - const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z; - - const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool8 vmask = tNear <= tFar; - const size_t mask = movemask(vmask); - - dist = tNear; - return mask & mvalid; - } - - -#endif - -#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL - - template<> - __forceinline size_t intersectNode<4,16>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,16,false>& ray, vfloat16& dist) - { - const size_t mvalid = movemask(node->validMask()); - const vfloat16 start_x(node->start.x); - const vfloat16 scale_x(node->scale.x); - const vfloat16 lower_x = madd(vfloat16(node->dequantize<4>(ray.nearX >> 2)),scale_x,start_x); - const vfloat16 upper_x = madd(vfloat16(node->dequantize<4>(ray.farX >> 2)),scale_x,start_x); - const vfloat16 start_y(node->start.y); - const vfloat16 scale_y(node->scale.y); - const vfloat16 lower_y = madd(vfloat16(node->dequantize<4>(ray.nearY >> 2)),scale_y,start_y); - const vfloat16 upper_y = madd(vfloat16(node->dequantize<4>(ray.farY >> 2)),scale_y,start_y); - const vfloat16 start_z(node->start.z); - const vfloat16 scale_z(node->scale.z); - const vfloat16 lower_z = madd(vfloat16(node->dequantize<4>(ray.nearZ >> 2)),scale_z,start_z); - const vfloat16 upper_z = madd(vfloat16(node->dequantize<4>(ray.farZ >> 2)),scale_z,start_z); - - const vfloat16 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); - const vfloat16 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); - const vfloat16 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); - const vfloat16 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); - const vfloat16 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); - const vfloat16 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); - const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool16 vmask = le(vbool16(0xf),tNear,tFar); - const size_t mask = movemask(vmask) & mvalid; - dist = tNear; - return mask; - } - - template<> - __forceinline size_t intersectNode<4,16>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,16,true>& ray, vfloat16& dist) - { - const size_t mvalid = movemask(node->validMask()); - const vfloat16 start_x(node->start.x); - const vfloat16 scale_x(node->scale.x); - const vfloat16 lower_x = madd(vfloat16(node->dequantize<4>(ray.nearX >> 2)),scale_x,start_x); - const vfloat16 upper_x = madd(vfloat16(node->dequantize<4>(ray.farX >> 2)),scale_x,start_x); - const vfloat16 start_y(node->start.y); - const vfloat16 scale_y(node->scale.y); - const vfloat16 lower_y = madd(vfloat16(node->dequantize<4>(ray.nearY >> 2)),scale_y,start_y); - const vfloat16 upper_y = madd(vfloat16(node->dequantize<4>(ray.farY >> 2)),scale_y,start_y); - const vfloat16 start_z(node->start.z); - const vfloat16 scale_z(node->scale.z); - const vfloat16 lower_z = madd(vfloat16(node->dequantize<4>(ray.nearZ >> 2)),scale_z,start_z); - const vfloat16 upper_z = madd(vfloat16(node->dequantize<4>(ray.farZ >> 2)),scale_z,start_z); - - const vfloat16 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x; - const vfloat16 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y; - const vfloat16 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z; - const vfloat16 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x; - const vfloat16 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y; - const vfloat16 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z; - - const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool16 vmask = le(vbool16(0xf),tNear,tFar); - const size_t mask = movemask(vmask) & mvalid; - dist = tNear; - return mask; - } - - template<> - __forceinline size_t intersectNode<8,16>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,16,false>& ray, vfloat16& dist) - { - const vbool16 m_valid(node->validMask16()); - const vfloat16 bminmaxX = node->dequantizeLowerUpperX(ray.permX); - const vfloat16 bminmaxY = node->dequantizeLowerUpperY(ray.permY); - const vfloat16 bminmaxZ = node->dequantizeLowerUpperZ(ray.permZ); - const vfloat16 tNearFarX = msub(bminmaxX, ray.rdir.x, ray.org_rdir.x); - const vfloat16 tNearFarY = msub(bminmaxY, ray.rdir.y, ray.org_rdir.y); - const vfloat16 tNearFarZ = msub(bminmaxZ, ray.rdir.z, ray.org_rdir.z); - const vfloat16 tNear = max(tNearFarX, tNearFarY, tNearFarZ, ray.tnear); - const vfloat16 tFar = min(tNearFarX, tNearFarY, tNearFarZ, ray.tfar); - const vbool16 vmask = le(m_valid,tNear,align_shift_right<8>(tFar, tFar)); - const size_t mask = movemask(vmask); - dist = tNear; - return mask; - } - - template<> - __forceinline size_t intersectNode<8,16>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,16,true>& ray, vfloat16& dist) - { - const vbool16 m_valid(node->validMask16()); - const vfloat16 bminmaxX = node->dequantizeLowerUpperX(ray.permX); - const vfloat16 bminmaxY = node->dequantizeLowerUpperY(ray.permY); - const vfloat16 bminmaxZ = node->dequantizeLowerUpperZ(ray.permZ); - const vfloat16 tNearFarX = (bminmaxX - ray.org.x) * ray.rdir_far.x; // FIXME: this is not conservative !!!!!!!!! - const vfloat16 tNearFarY = (bminmaxY - ray.org.y) * ray.rdir_far.y; - const vfloat16 tNearFarZ = (bminmaxZ - ray.org.z) * ray.rdir_far.z; - const vfloat16 tNear = max(tNearFarX, tNearFarY, tNearFarZ, ray.tnear); - const vfloat16 tFar = min(tNearFarX, tNearFarY, tNearFarZ, ray.tfar); - const vbool16 vmask = le(m_valid,tNear,align_shift_right<8>(tFar, tFar)); - const size_t mask = movemask(vmask); - dist = tNear; - return mask; - } - - -#endif - - - template<int N, int Nx> - __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,Nx,false>& ray, const float time, vfloat<N>& dist) - { - const vboolf<N> mvalid = node->validMask(); - const vfloat<N> lower_x = node->dequantizeLowerX(time); - const vfloat<N> upper_x = node->dequantizeUpperX(time); - const vfloat<N> lower_y = node->dequantizeLowerY(time); - const vfloat<N> upper_y = node->dequantizeUpperY(time); - const vfloat<N> lower_z = node->dequantizeLowerZ(time); - const vfloat<N> upper_z = node->dequantizeUpperZ(time); -#if defined(__FMA_X4__) -#if defined(__aarch64__) - const vfloat<N> tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<N> tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<N> tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<N> tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<N> tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<N> tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z); -#else - const vfloat<N> tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); - const vfloat<N> tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); - const vfloat<N> tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); - const vfloat<N> tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); - const vfloat<N> tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); - const vfloat<N> tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); -#endif -#else - const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir.x; - const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir.y; - const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir.z; - const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir.x; - const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir.y; - const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir.z; -#endif - - const vfloat<N> tminX = mini(tNearX,tFarX); - const vfloat<N> tmaxX = maxi(tNearX,tFarX); - const vfloat<N> tminY = mini(tNearY,tFarY); - const vfloat<N> tmaxY = maxi(tNearY,tFarY); - const vfloat<N> tminZ = mini(tNearZ,tFarZ); - const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ); - const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear); - const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar); -#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX - const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar)); -#else - const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid; -#endif - const size_t mask = movemask(vmask); - dist = tNear; - return mask; - } - - template<int N, int Nx> - __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,Nx,true>& ray, const float time, vfloat<N>& dist) - { - const vboolf<N> mvalid = node->validMask(); - const vfloat<N> lower_x = node->dequantizeLowerX(time); - const vfloat<N> upper_x = node->dequantizeUpperX(time); - const vfloat<N> lower_y = node->dequantizeLowerY(time); - const vfloat<N> upper_y = node->dequantizeUpperY(time); - const vfloat<N> lower_z = node->dequantizeLowerZ(time); - const vfloat<N> upper_z = node->dequantizeUpperZ(time); - const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir_near.x; - const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir_near.y; - const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z; - const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir_far.x; - const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir_far.y; - const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z; - - const vfloat<N> tminX = mini(tNearX,tFarX); - const vfloat<N> tmaxX = maxi(tNearX,tFarX); - const vfloat<N> tminY = mini(tNearY,tFarY); - const vfloat<N> tmaxY = maxi(tNearY,tFarY); - const vfloat<N> tminZ = mini(tNearZ,tFarZ); - const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ); - const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear); - const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar); -#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX - const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar)); -#else - const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid; -#endif - const size_t mask = movemask(vmask); - dist = tNear; - return mask; - } - - -#if defined(__AVX512ER__) - // for KNL - template<> - __forceinline size_t intersectNode<4,16>(const typename BVHN<4>::QuantizedBaseNodeMB* node, const TravRay<4,16,false>& ray, const float time, vfloat<4>& dist) - { - const size_t mvalid = movemask(node->validMask()); - const vfloat16 lower_x = node->dequantizeLowerX(time); - const vfloat16 upper_x = node->dequantizeUpperX(time); - const vfloat16 lower_y = node->dequantizeLowerY(time); - const vfloat16 upper_y = node->dequantizeUpperY(time); - const vfloat16 lower_z = node->dequantizeLowerZ(time); - const vfloat16 upper_z = node->dequantizeUpperZ(time); - - const vfloat16 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); - const vfloat16 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); - const vfloat16 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); - const vfloat16 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); - const vfloat16 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); - const vfloat16 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); - - const vfloat16 tminX = min(tNearX,tFarX); - const vfloat16 tmaxX = max(tNearX,tFarX); - const vfloat16 tminY = min(tNearY,tFarY); - const vfloat16 tmaxY = max(tNearY,tFarY); - const vfloat16 tminZ = min(tNearZ,tFarZ); - const vfloat16 tmaxZ = max(tNearZ,tFarZ); - const vfloat16 tNear = max(tminX,tminY,tminZ,ray.tnear); - const vfloat16 tFar = min(tmaxX,tmaxY,tmaxZ,ray.tfar ); - const vbool16 vmask = tNear <= tFar; - const size_t mask = movemask(vmask) & mvalid; - dist = extractN<4,0>(tNear); - return mask; - } - - - // for KNL - template<> - __forceinline size_t intersectNode<4,16>(const typename BVHN<4>::QuantizedBaseNodeMB* node, const TravRay<4,16,true>& ray, const float time, vfloat<4>& dist) - { - const size_t mvalid = movemask(node->validMask()); - const vfloat16 lower_x = node->dequantizeLowerX(time); - const vfloat16 upper_x = node->dequantizeUpperX(time); - const vfloat16 lower_y = node->dequantizeLowerY(time); - const vfloat16 upper_y = node->dequantizeUpperY(time); - const vfloat16 lower_z = node->dequantizeLowerZ(time); - const vfloat16 upper_z = node->dequantizeUpperZ(time); - - const vfloat16 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x; - const vfloat16 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y; - const vfloat16 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z; - const vfloat16 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x; - const vfloat16 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y; - const vfloat16 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z; - - const vfloat16 tminX = min(tNearX,tFarX); - const vfloat16 tmaxX = max(tNearX,tFarX); - const vfloat16 tminY = min(tNearY,tFarY); - const vfloat16 tmaxY = max(tNearY,tFarY); - const vfloat16 tminZ = min(tNearZ,tFarZ); - const vfloat16 tmaxZ = max(tNearZ,tFarZ); - const vfloat16 tNear = max(tminX,tminY,tminZ,ray.tnear); - const vfloat16 tFar = min(tmaxX,tmaxY,tmaxZ,ray.tfar ); - const vbool16 vmask = tNear <= tFar; - const size_t mask = movemask(vmask) & mvalid; - dist = extractN<4,0>(tNear); - return mask; - } - -#endif - - ////////////////////////////////////////////////////////////////////////////////////// - // Fast OBBNode intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, bool robust> - __forceinline size_t intersectNode(const typename BVHN<N>::OBBNode* node, const TravRay<N,N,robust>& ray, vfloat<N>& dist) - { - const Vec3vf<N> dir = xfmVector(node->naabb,ray.dir); - //const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))/dir; - const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))*rcp_safe(dir); - const Vec3vf<N> org = xfmPoint(node->naabb,ray.org); - const Vec3vf<N> tLowerXYZ = org * nrdir; // (Vec3fa(zero) - org) * rdir; - const Vec3vf<N> tUpperXYZ = tLowerXYZ - nrdir; // (Vec3fa(one ) - org) * rdir; - - const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x); - const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y); - const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z); - const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x); - const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y); - const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z); - vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ); - vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ ); - if (robust) { - tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp)); - tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp)); - } - const vbool<N> vmask = tNear <= tFar; - dist = tNear; - return movemask(vmask); - } - - ////////////////////////////////////////////////////////////////////////////////////// - // Fast OBBNodeMB intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, bool robust> - __forceinline size_t intersectNode(const typename BVHN<N>::OBBNodeMB* node, const TravRay<N,N,robust>& ray, const float time, vfloat<N>& dist) - { - const AffineSpace3vf<N> xfm = node->space0; - const Vec3vf<N> b0_lower = zero; - const Vec3vf<N> b0_upper = one; - const Vec3vf<N> lower = lerp(b0_lower,node->b1.lower,vfloat<N>(time)); - const Vec3vf<N> upper = lerp(b0_upper,node->b1.upper,vfloat<N>(time)); - - const BBox3vf<N> bounds(lower,upper); - const Vec3vf<N> dir = xfmVector(xfm,ray.dir); - const Vec3vf<N> rdir = rcp_safe(dir); - const Vec3vf<N> org = xfmPoint(xfm,ray.org); - - const Vec3vf<N> tLowerXYZ = (bounds.lower - org) * rdir; - const Vec3vf<N> tUpperXYZ = (bounds.upper - org) * rdir; - - const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x); - const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y); - const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z); - const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x); - const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y); - const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z); - vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ); - vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ ); - if (robust) { - tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp)); - tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp)); - } - const vbool<N> vmask = tNear <= tFar; - dist = tNear; - return movemask(vmask); - } - - ////////////////////////////////////////////////////////////////////////////////////// - // Node intersectors used in point query raversal - ////////////////////////////////////////////////////////////////////////////////////// - - /*! Computes traversal information for N nodes with 1 point query */ - template<int N, int types> - struct BVHNNodePointQuerySphere1; - - template<int N> - struct BVHNNodePointQuerySphere1<N, BVH_AN1> - { - static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - mask = pointQueryNodeSphere(node.getAABBNode(), query, dist); - return true; - } - }; - - template<int N> - struct BVHNNodePointQuerySphere1<N, BVH_AN2> - { - static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist); - return true; - } - }; - - template<int N> - struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D> - { - static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - mask = pointQueryNodeSphereMB4D<N>(node, query, time, dist); - return true; - } - }; - - template<int N> - struct BVHNNodePointQuerySphere1<N, BVH_AN1_UN1> - { - static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) - { - if (likely(node.isAABBNode())) mask = pointQueryNodeSphere(node.getAABBNode(), query, dist); - else if (unlikely(node.isOBBNode())) mask = pointQueryNodeSphere(node.ungetAABBNode(), query, dist); - else return false; - return true; - } - }; - - template<int N> - struct BVHNNodePointQuerySphere1<N, BVH_AN2_UN2> - { - static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) - { - if (likely(node.isAABBNodeMB())) mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist); - else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist); - else return false; - return true; - } - }; - - template<int N> - struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D_UN2> - { - static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist); - else mask = pointQueryNodeSphereMB4D(node, query, time, dist); - return true; - } - }; - - template<int N> - struct BVHNNodePointQuerySphere1<N, BVH_QN1> - { - static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - mask = pointQueryNodeSphere((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist); - return true; - } - }; - - template<int N> - struct BVHNQuantizedBaseNodePointQuerySphere1 - { - static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) - { - return pointQueryNodeSphere(node,query,dist); - } - - static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) - { - return pointQueryNodeSphere(node,query,time,dist); - } - }; - - /*! Computes traversal information for N nodes with 1 point query */ - template<int N, int types> - struct BVHNNodePointQueryAABB1; - - template<int N> - struct BVHNNodePointQueryAABB1<N, BVH_AN1> - { - static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - mask = pointQueryNodeAABB(node.getAABBNode(), query, dist); - return true; - } - }; - - template<int N> - struct BVHNNodePointQueryAABB1<N, BVH_AN2> - { - static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist); - return true; - } - }; - - template<int N> - struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D> - { - static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - mask = pointQueryNodeAABBMB4D<N>(node, query, time, dist); - return true; - } - }; - - template<int N> - struct BVHNNodePointQueryAABB1<N, BVH_AN1_UN1> - { - static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) - { - if (likely(node.isAABBNode())) mask = pointQueryNodeAABB(node.getAABBNode(), query, dist); - else if (unlikely(node.isOBBNode())) mask = pointQueryNodeAABB(node.ungetAABBNode(), query, dist); - else return false; - return true; - } - }; - - template<int N> - struct BVHNNodePointQueryAABB1<N, BVH_AN2_UN2> - { - static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) - { - if (likely(node.isAABBNodeMB())) mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist); - else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist); - else return false; - return true; - } - }; - - template<int N> - struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D_UN2> - { - static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist); - else mask = pointQueryNodeAABBMB4D(node, query, time, dist); - return true; - } - }; - - template<int N> - struct BVHNNodePointQueryAABB1<N, BVH_QN1> - { - static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - mask = pointQueryNodeAABB((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist); - return true; - } - }; - - template<int N> - struct BVHNQuantizedBaseNodePointQueryAABB1 - { - static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) - { - return pointQueryNodeAABB(node,query,dist); - } - - static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) - { - return pointQueryNodeAABB(node,query,time,dist); - } - }; - - - ////////////////////////////////////////////////////////////////////////////////////// - // Node intersectors used in ray traversal - ////////////////////////////////////////////////////////////////////////////////////// - - /*! Intersects N nodes with 1 ray */ - template<int N, int Nx, int types, bool robust> - struct BVHNNodeIntersector1; - - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN1, false> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - mask = intersectNode(node.getAABBNode(), ray, dist); - return true; - } - }; - - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN1, true> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - mask = intersectNodeRobust(node.getAABBNode(), ray, dist); - return true; - } - }; - - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN2, false> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - mask = intersectNode(node.getAABBNodeMB(), ray, time, dist); - return true; - } - }; - - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN2, true> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist); - return true; - } - }; - - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN2_AN4D, false> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - mask = intersectNodeMB4D<N>(node, ray, time, dist); - return true; - } - }; - - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN2_AN4D, true> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - mask = intersectNodeMB4DRobust<N>(node, ray, time, dist); - return true; - } - }; - - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN1_UN1, false> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask) - { - if (likely(node.isAABBNode())) mask = intersectNode(node.getAABBNode(), ray, dist); - else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist); - else return false; - return true; - } - }; - - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN1_UN1, true> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask) - { - if (likely(node.isAABBNode())) mask = intersectNodeRobust(node.getAABBNode(), ray, dist); - else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist); - else return false; - return true; - } - }; - - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN2_UN2, false> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask) - { - if (likely(node.isAABBNodeMB())) mask = intersectNode(node.getAABBNodeMB(), ray, time, dist); - else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist); - else return false; - return true; - } - }; - - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN2_UN2, true> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask) - { - if (likely(node.isAABBNodeMB())) mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist); - else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist); - else return false; - return true; - } - }; - - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN2_AN4D_UN2, false> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist); - else mask = intersectNodeMB4D(node, ray, time, dist); - return true; - } - }; - - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN2_AN4D_UN2, true> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist); - else mask = intersectNodeMB4DRobust(node, ray, time, dist); - return true; - } - }; - - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_QN1, false> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - mask = intersectNode((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist); - return true; - } - }; - - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_QN1, true> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask) - { - if (unlikely(node.isLeaf())) return false; - mask = intersectNodeRobust((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist); - return true; - } - }; - - /*! Intersects N nodes with K rays */ - template<int N, int Nx, bool robust> - struct BVHNQuantizedBaseNodeIntersector1; - - template<int N, int Nx> - struct BVHNQuantizedBaseNodeIntersector1<N, Nx, false> - { - static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,Nx,false>& ray, vfloat<Nx>& dist) - { - return intersectNode(node,ray,dist); - } - - static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,Nx,false>& ray, const float time, vfloat<N>& dist) - { - return intersectNode(node,ray,time,dist); - } - - }; - - template<int N, int Nx> - struct BVHNQuantizedBaseNodeIntersector1<N, Nx, true> - { - static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,Nx,true>& ray, vfloat<Nx>& dist) - { - return intersectNode(node,ray,dist); - } - - static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,Nx,true>& ray, const float time, vfloat<N>& dist) - { - return intersectNode(node,ray,time,dist); - } - - }; - - - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_frustum.h b/thirdparty/embree-aarch64/kernels/bvh/node_intersector_frustum.h deleted file mode 100644 index 800ac8b478..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_frustum.h +++ /dev/null @@ -1,269 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "node_intersector.h" - -namespace embree -{ - namespace isa - { - ////////////////////////////////////////////////////////////////////////////////////// - // Frustum structure used in hybrid and stream traversal - ////////////////////////////////////////////////////////////////////////////////////// - - /* - Optimized frustum test. We calculate t=(p-org)/dir in ray/box - intersection. We assume the rays are split by octant, thus - dir intervals are either positive or negative in each - dimension. - - Case 1: dir.min >= 0 && dir.max >= 0: - t_min = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min - t_max = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max - - Case 2: dir.min < 0 && dir.max < 0: - t_min = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max - t_max = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min - */ - - template<bool robust> - struct Frustum; - - /* Fast variant */ - template<> - struct Frustum<false> - { - __forceinline Frustum() {} - - template<int K> - __forceinline Frustum(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) - { - init(valid, org, rdir, ray_tnear, ray_tfar, N); - } - - template<int K> - __forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) - { - const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)), - reduce_min(select(valid, org.y, pos_inf)), - reduce_min(select(valid, org.z, pos_inf))); - - const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)), - reduce_max(select(valid, org.y, neg_inf)), - reduce_max(select(valid, org.z, neg_inf))); - - const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)), - reduce_min(select(valid, rdir.y, pos_inf)), - reduce_min(select(valid, rdir.z, pos_inf))); - - const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)), - reduce_max(select(valid, rdir.y, neg_inf)), - reduce_max(select(valid, rdir.z, neg_inf))); - - const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf))); - const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf))); - - init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N); - } - - __forceinline void init(const Vec3fa& reduced_min_org, - const Vec3fa& reduced_max_org, - const Vec3fa& reduced_min_rdir, - const Vec3fa& reduced_max_rdir, - float reduced_min_dist, - float reduced_max_dist, - int N) - { - const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero)); - - min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir); - max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir); - -#if defined (__aarch64__) - neg_min_org_rdir = -(min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org)); - neg_max_org_rdir = -(max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org)); -#else - min_org_rdir = min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org); - max_org_rdir = max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org); -#endif - min_dist = reduced_min_dist; - max_dist = reduced_max_dist; - - nf = NearFarPrecalculations(min_rdir, N); - } - - template<int K> - __forceinline void updateMaxDist(const vfloat<K>& ray_tfar) - { - max_dist = reduce_max(ray_tfar); - } - - NearFarPrecalculations nf; - - Vec3fa min_rdir; - Vec3fa max_rdir; - -#if defined (__aarch64__) - Vec3fa neg_min_org_rdir; - Vec3fa neg_max_org_rdir; -#else - Vec3fa min_org_rdir; - Vec3fa max_org_rdir; -#endif - float min_dist; - float max_dist; - }; - - typedef Frustum<false> FrustumFast; - - /* Robust variant */ - template<> - struct Frustum<true> - { - __forceinline Frustum() {} - - template<int K> - __forceinline Frustum(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) - { - init(valid, org, rdir, ray_tnear, ray_tfar, N); - } - - template<int K> - __forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) - { - const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)), - reduce_min(select(valid, org.y, pos_inf)), - reduce_min(select(valid, org.z, pos_inf))); - - const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)), - reduce_max(select(valid, org.y, neg_inf)), - reduce_max(select(valid, org.z, neg_inf))); - - const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)), - reduce_min(select(valid, rdir.y, pos_inf)), - reduce_min(select(valid, rdir.z, pos_inf))); - - const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)), - reduce_max(select(valid, rdir.y, neg_inf)), - reduce_max(select(valid, rdir.z, neg_inf))); - - const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf))); - const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf))); - - init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N); - } - - __forceinline void init(const Vec3fa& reduced_min_org, - const Vec3fa& reduced_max_org, - const Vec3fa& reduced_min_rdir, - const Vec3fa& reduced_max_rdir, - float reduced_min_dist, - float reduced_max_dist, - int N) - { - const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero)); - min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir); - max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir); - - min_org = select(pos_rdir, reduced_max_org, reduced_min_org); - max_org = select(pos_rdir, reduced_min_org, reduced_max_org); - - min_dist = reduced_min_dist; - max_dist = reduced_max_dist; - - nf = NearFarPrecalculations(min_rdir, N); - } - - template<int K> - __forceinline void updateMaxDist(const vfloat<K>& ray_tfar) - { - max_dist = reduce_max(ray_tfar); - } - - NearFarPrecalculations nf; - - Vec3fa min_rdir; - Vec3fa max_rdir; - - Vec3fa min_org; - Vec3fa max_org; - - float min_dist; - float max_dist; - }; - - typedef Frustum<true> FrustumRobust; - - ////////////////////////////////////////////////////////////////////////////////////// - // Fast AABBNode intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, int Nx> - __forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node, - const FrustumFast& frustum, vfloat<Nx>& dist) - { - const vfloat<Nx> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX); - const vfloat<Nx> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY); - const vfloat<Nx> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ); - const vfloat<Nx> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX); - const vfloat<Nx> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY); - const vfloat<Nx> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ); - -#if defined (__aarch64__) - const vfloat<Nx> fminX = madd(bminX, vfloat<Nx>(frustum.min_rdir.x), vfloat<Nx>(frustum.neg_min_org_rdir.x)); - const vfloat<Nx> fminY = madd(bminY, vfloat<Nx>(frustum.min_rdir.y), vfloat<Nx>(frustum.neg_min_org_rdir.y)); - const vfloat<Nx> fminZ = madd(bminZ, vfloat<Nx>(frustum.min_rdir.z), vfloat<Nx>(frustum.neg_min_org_rdir.z)); - const vfloat<Nx> fmaxX = madd(bmaxX, vfloat<Nx>(frustum.max_rdir.x), vfloat<Nx>(frustum.neg_max_org_rdir.x)); - const vfloat<Nx> fmaxY = madd(bmaxY, vfloat<Nx>(frustum.max_rdir.y), vfloat<Nx>(frustum.neg_max_org_rdir.y)); - const vfloat<Nx> fmaxZ = madd(bmaxZ, vfloat<Nx>(frustum.max_rdir.z), vfloat<Nx>(frustum.neg_max_org_rdir.z)); -#else - const vfloat<Nx> fminX = msub(bminX, vfloat<Nx>(frustum.min_rdir.x), vfloat<Nx>(frustum.min_org_rdir.x)); - const vfloat<Nx> fminY = msub(bminY, vfloat<Nx>(frustum.min_rdir.y), vfloat<Nx>(frustum.min_org_rdir.y)); - const vfloat<Nx> fminZ = msub(bminZ, vfloat<Nx>(frustum.min_rdir.z), vfloat<Nx>(frustum.min_org_rdir.z)); - const vfloat<Nx> fmaxX = msub(bmaxX, vfloat<Nx>(frustum.max_rdir.x), vfloat<Nx>(frustum.max_org_rdir.x)); - const vfloat<Nx> fmaxY = msub(bmaxY, vfloat<Nx>(frustum.max_rdir.y), vfloat<Nx>(frustum.max_org_rdir.y)); - const vfloat<Nx> fmaxZ = msub(bmaxZ, vfloat<Nx>(frustum.max_rdir.z), vfloat<Nx>(frustum.max_org_rdir.z)); -#endif - const vfloat<Nx> fmin = maxi(fminX, fminY, fminZ, vfloat<Nx>(frustum.min_dist)); - dist = fmin; - const vfloat<Nx> fmax = mini(fmaxX, fmaxY, fmaxZ, vfloat<Nx>(frustum.max_dist)); - const vbool<Nx> vmask_node_hit = fmin <= fmax; - size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1); - return m_node; - } - - ////////////////////////////////////////////////////////////////////////////////////// - // Robust AABBNode intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, int Nx> - __forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node, - const FrustumRobust& frustum, vfloat<Nx>& dist) - { - const vfloat<Nx> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX); - const vfloat<Nx> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY); - const vfloat<Nx> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ); - const vfloat<Nx> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX); - const vfloat<Nx> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY); - const vfloat<Nx> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ); - - const vfloat<Nx> fminX = (bminX - vfloat<Nx>(frustum.min_org.x)) * vfloat<Nx>(frustum.min_rdir.x); - const vfloat<Nx> fminY = (bminY - vfloat<Nx>(frustum.min_org.y)) * vfloat<Nx>(frustum.min_rdir.y); - const vfloat<Nx> fminZ = (bminZ - vfloat<Nx>(frustum.min_org.z)) * vfloat<Nx>(frustum.min_rdir.z); - const vfloat<Nx> fmaxX = (bmaxX - vfloat<Nx>(frustum.max_org.x)) * vfloat<Nx>(frustum.max_rdir.x); - const vfloat<Nx> fmaxY = (bmaxY - vfloat<Nx>(frustum.max_org.y)) * vfloat<Nx>(frustum.max_rdir.y); - const vfloat<Nx> fmaxZ = (bmaxZ - vfloat<Nx>(frustum.max_org.z)) * vfloat<Nx>(frustum.max_rdir.z); - - const float round_down = 1.0f-2.0f*float(ulp); // FIXME: use per instruction rounding for AVX512 - const float round_up = 1.0f+2.0f*float(ulp); - const vfloat<Nx> fmin = max(fminX, fminY, fminZ, vfloat<Nx>(frustum.min_dist)); - dist = fmin; - const vfloat<Nx> fmax = min(fmaxX, fmaxY, fmaxZ, vfloat<Nx>(frustum.max_dist)); - const vbool<Nx> vmask_node_hit = (round_down*fmin <= round_up*fmax); - size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1); - return m_node; - } - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet.h b/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet.h deleted file mode 100644 index 0543e56f8e..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet.h +++ /dev/null @@ -1,843 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "node_intersector.h" - -namespace embree -{ - namespace isa - { - ////////////////////////////////////////////////////////////////////////////////////// - // Ray packet structure used in hybrid traversal - ////////////////////////////////////////////////////////////////////////////////////// - - template<int K, bool robust> - struct TravRayK; - - /* Fast variant */ - template<int K> - struct TravRayK<K, false> - { - __forceinline TravRayK() {} - - __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N) - { - init(ray_org, ray_dir, N); - } - - __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) - { - init(ray_org, ray_dir, N); - tnear = ray_tnear; - tfar = ray_tfar; - } - - __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N) - { - org = ray_org; - dir = ray_dir; - rdir = rcp_safe(ray_dir); -#if defined(__aarch64__) - neg_org_rdir = -(org * rdir); -#elif defined(__AVX2__) - org_rdir = org * rdir; -#endif - if (N) - { - const int size = sizeof(float)*N; - nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size)); - nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size)); - nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size)); - } - } - - Vec3vf<K> org; - Vec3vf<K> dir; - Vec3vf<K> rdir; -#if defined(__aarch64__) - Vec3vf<K> neg_org_rdir; -#elif defined(__AVX2__) - Vec3vf<K> org_rdir; -#endif - Vec3vi<K> nearXYZ; - vfloat<K> tnear; - vfloat<K> tfar; - }; - - template<int K> - using TravRayKFast = TravRayK<K, false>; - - /* Robust variant */ - template<int K> - struct TravRayK<K, true> - { - __forceinline TravRayK() {} - - __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N) - { - init(ray_org, ray_dir, N); - } - - __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) - { - init(ray_org, ray_dir, N); - tnear = ray_tnear; - tfar = ray_tfar; - } - - __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N) - { - org = ray_org; - dir = ray_dir; - rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir)); - - if (N) - { - const int size = sizeof(float)*N; - nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size)); - nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size)); - nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size)); - } - } - - Vec3vf<K> org; - Vec3vf<K> dir; - Vec3vf<K> rdir; - Vec3vi<K> nearXYZ; - vfloat<K> tnear; - vfloat<K> tfar; - }; - - template<int K> - using TravRayKRobust = TravRayK<K, true>; - - ////////////////////////////////////////////////////////////////////////////////////// - // Fast AABBNode intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, int K> - __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNode* node, size_t i, - const TravRayKFast<K>& ray, vfloat<K>& dist) - - { -#if defined(__aarch64__) - const vfloat<K> lclipMinX = madd(node->lower_x[i], ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMinY = madd(node->lower_y[i], ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMinZ = madd(node->lower_z[i], ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<K> lclipMaxX = madd(node->upper_x[i], ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMaxY = madd(node->upper_y[i], ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMaxZ = madd(node->upper_z[i], ray.rdir.z, ray.neg_org_rdir.z); -#elif defined(__AVX2__) - const vfloat<K> lclipMinX = msub(node->lower_x[i], ray.rdir.x, ray.org_rdir.x); - const vfloat<K> lclipMinY = msub(node->lower_y[i], ray.rdir.y, ray.org_rdir.y); - const vfloat<K> lclipMinZ = msub(node->lower_z[i], ray.rdir.z, ray.org_rdir.z); - const vfloat<K> lclipMaxX = msub(node->upper_x[i], ray.rdir.x, ray.org_rdir.x); - const vfloat<K> lclipMaxY = msub(node->upper_y[i], ray.rdir.y, ray.org_rdir.y); - const vfloat<K> lclipMaxZ = msub(node->upper_z[i], ray.rdir.z, ray.org_rdir.z); - #else - const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z; - const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z; - #endif - - #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX - if (K == 16) - { - /* use mixed float/int min/max */ - const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); - const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); - const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); - dist = lnearP; - return lhit; - } - else - #endif - { - const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); - const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); - #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX - const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); - #else - const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); - #endif - dist = lnearP; - return lhit; - } - } - - ////////////////////////////////////////////////////////////////////////////////////// - // Robust AABBNode intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, int K> - __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNode* node, size_t i, - const TravRayKRobust<K>& ray, vfloat<K>& dist) - { - // FIXME: use per instruction rounding for AVX512 - const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z; - const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z; - const float round_up = 1.0f+3.0f*float(ulp); - const float round_down = 1.0f-3.0f*float(ulp); - const vfloat<K> lnearP = round_down*max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); - const vfloat<K> lfarP = round_up *min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); - const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); - dist = lnearP; - return lhit; - } - - ////////////////////////////////////////////////////////////////////////////////////// - // Fast AABBNodeMB intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, int K> - __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNodeMB* node, const size_t i, - const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist) - { - const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i])); - const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i])); - const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i])); - const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i])); - const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); - const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); - -#if defined(__aarch64__) - const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z); -#elif defined(__AVX2__) - const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x); - const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y); - const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z); - const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x); - const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y); - const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z); -#else - const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; - const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; -#endif - -#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX - if (K == 16) - { - /* use mixed float/int min/max */ - const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); - const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); - const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); - dist = lnearP; - return lhit; - } - else -#endif - { - const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); - const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); -#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX - const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); -#else - const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); -#endif - dist = lnearP; - return lhit; - } - } - - ////////////////////////////////////////////////////////////////////////////////////// - // Robust AABBNodeMB intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, int K> - __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNodeMB* node, const size_t i, - const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist) - { - const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i])); - const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i])); - const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i])); - const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i])); - const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); - const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); - - const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; - const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; - - const float round_up = 1.0f+3.0f*float(ulp); - const float round_down = 1.0f-3.0f*float(ulp); - -#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX - if (K == 16) - { - const vfloat<K> lnearP = round_down*maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); - const vfloat<K> lfarP = round_up *mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); - const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); - dist = lnearP; - return lhit; - } - else -#endif - { - const vfloat<K> lnearP = round_down*maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); - const vfloat<K> lfarP = round_up *mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); - const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); - dist = lnearP; - return lhit; - } - } - - ////////////////////////////////////////////////////////////////////////////////////// - // Fast AABBNodeMB4D intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, int K> - __forceinline vbool<K> intersectNodeKMB4D(const typename BVHN<N>::NodeRef ref, const size_t i, - const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist) - { - const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); - - const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i])); - const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i])); - const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i])); - const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i])); - const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); - const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); - -#if defined(__aarch64__) - const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z); -#elif defined(__AVX2__) - const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x); - const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y); - const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z); - const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x); - const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y); - const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z); -#else - const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; - const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; -#endif - - const vfloat<K> lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); - const vfloat<K> lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); - vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); - if (unlikely(ref.isAABBNodeMB4D())) { - const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; - lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i])); - } - dist = lnearP; - return lhit; - } - - ////////////////////////////////////////////////////////////////////////////////////// - // Robust AABBNodeMB4D intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, int K> - __forceinline vbool<K> intersectNodeKMB4DRobust(const typename BVHN<N>::NodeRef ref, const size_t i, - const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist) - { - const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); - - const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i])); - const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i])); - const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i])); - const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i])); - const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); - const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); - - const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; - const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; - - const float round_up = 1.0f+3.0f*float(ulp); - const float round_down = 1.0f-3.0f*float(ulp); - const vfloat<K> lnearP = round_down*maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); - const vfloat<K> lfarP = round_up *mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); - vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); - - if (unlikely(ref.isAABBNodeMB4D())) { - const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; - lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i])); - } - dist = lnearP; - return lhit; - } - - ////////////////////////////////////////////////////////////////////////////////////// - // Fast OBBNode intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, int K, bool robust> - __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNode* node, const size_t i, - const TravRayK<K,robust>& ray, vfloat<K>& dist) - { - const AffineSpace3vf<K> naabb(Vec3f(node->naabb.l.vx.x[i], node->naabb.l.vx.y[i], node->naabb.l.vx.z[i]), - Vec3f(node->naabb.l.vy.x[i], node->naabb.l.vy.y[i], node->naabb.l.vy.z[i]), - Vec3f(node->naabb.l.vz.x[i], node->naabb.l.vz.y[i], node->naabb.l.vz.z[i]), - Vec3f(node->naabb.p .x[i], node->naabb.p .y[i], node->naabb.p .z[i])); - - const Vec3vf<K> dir = xfmVector(naabb, ray.dir); - const Vec3vf<K> nrdir = Vec3vf<K>(vfloat<K>(-1.0f)) * rcp_safe(dir); // FIXME: negate instead of mul with -1? - const Vec3vf<K> org = xfmPoint(naabb, ray.org); - - const vfloat<K> lclipMinX = org.x * nrdir.x; // (Vec3fa(zero) - org) * rdir; - const vfloat<K> lclipMinY = org.y * nrdir.y; - const vfloat<K> lclipMinZ = org.z * nrdir.z; - const vfloat<K> lclipMaxX = lclipMinX - nrdir.x; // (Vec3fa(one) - org) * rdir; - const vfloat<K> lclipMaxY = lclipMinY - nrdir.y; - const vfloat<K> lclipMaxZ = lclipMinZ - nrdir.z; - - vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); - vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); - if (robust) { - lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp)); - lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp)); - } - const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); - dist = lnearP; - return lhit; - } - - ////////////////////////////////////////////////////////////////////////////////////// - // Fast OBBNodeMB intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, int K, bool robust> - __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNodeMB* node, const size_t i, - const TravRayK<K,robust>& ray, const vfloat<K>& time, vfloat<K>& dist) - { - const AffineSpace3vf<K> xfm(Vec3f(node->space0.l.vx.x[i], node->space0.l.vx.y[i], node->space0.l.vx.z[i]), - Vec3f(node->space0.l.vy.x[i], node->space0.l.vy.y[i], node->space0.l.vy.z[i]), - Vec3f(node->space0.l.vz.x[i], node->space0.l.vz.y[i], node->space0.l.vz.z[i]), - Vec3f(node->space0.p .x[i], node->space0.p .y[i], node->space0.p .z[i])); - - const Vec3vf<K> b0_lower = zero; - const Vec3vf<K> b0_upper = one; - const Vec3vf<K> b1_lower(node->b1.lower.x[i], node->b1.lower.y[i], node->b1.lower.z[i]); - const Vec3vf<K> b1_upper(node->b1.upper.x[i], node->b1.upper.y[i], node->b1.upper.z[i]); - const Vec3vf<K> lower = lerp(b0_lower, b1_lower, time); - const Vec3vf<K> upper = lerp(b0_upper, b1_upper, time); - - const Vec3vf<K> dir = xfmVector(xfm, ray.dir); - const Vec3vf<K> rdir = rcp_safe(dir); - const Vec3vf<K> org = xfmPoint(xfm, ray.org); - - const vfloat<K> lclipMinX = (lower.x - org.x) * rdir.x; - const vfloat<K> lclipMinY = (lower.y - org.y) * rdir.y; - const vfloat<K> lclipMinZ = (lower.z - org.z) * rdir.z; - const vfloat<K> lclipMaxX = (upper.x - org.x) * rdir.x; - const vfloat<K> lclipMaxY = (upper.y - org.y) * rdir.y; - const vfloat<K> lclipMaxZ = (upper.z - org.z) * rdir.z; - - vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); - vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); - if (robust) { - lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp)); - lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp)); - } - - const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); - dist = lnearP; - return lhit; - } - - - - ////////////////////////////////////////////////////////////////////////////////////// - // QuantizedBaseNode intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, int K> - __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i, - const TravRayK<K,false>& ray, vfloat<K>& dist) - - { - assert(movemask(node->validMask()) & ((size_t)1 << i)); - const vfloat<N> lower_x = node->dequantizeLowerX(); - const vfloat<N> upper_x = node->dequantizeUpperX(); - const vfloat<N> lower_y = node->dequantizeLowerY(); - const vfloat<N> upper_y = node->dequantizeUpperY(); - const vfloat<N> lower_z = node->dequantizeLowerZ(); - const vfloat<N> upper_z = node->dequantizeUpperZ(); - - #if defined(__aarch64__) - const vfloat<K> lclipMinX = madd(lower_x[i], ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMinY = madd(lower_y[i], ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMinZ = madd(lower_z[i], ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<K> lclipMaxX = madd(upper_x[i], ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMaxY = madd(upper_y[i], ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMaxZ = madd(upper_z[i], ray.rdir.z, ray.neg_org_rdir.z); - #elif defined(__AVX2__) - const vfloat<K> lclipMinX = msub(lower_x[i], ray.rdir.x, ray.org_rdir.x); - const vfloat<K> lclipMinY = msub(lower_y[i], ray.rdir.y, ray.org_rdir.y); - const vfloat<K> lclipMinZ = msub(lower_z[i], ray.rdir.z, ray.org_rdir.z); - const vfloat<K> lclipMaxX = msub(upper_x[i], ray.rdir.x, ray.org_rdir.x); - const vfloat<K> lclipMaxY = msub(upper_y[i], ray.rdir.y, ray.org_rdir.y); - const vfloat<K> lclipMaxZ = msub(upper_z[i], ray.rdir.z, ray.org_rdir.z); - #else - const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z; - const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z; - #endif - - #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX - if (K == 16) - { - /* use mixed float/int min/max */ - const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); - const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); - const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); - dist = lnearP; - return lhit; - } - else - #endif - { - const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); - const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); - #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX - const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); - #else - const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); - #endif - dist = lnearP; - return lhit; - } - } - - template<int N, int K> - __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i, - const TravRayK<K,true>& ray, vfloat<K>& dist) - - { - assert(movemask(node->validMask()) & ((size_t)1 << i)); - const vfloat<N> lower_x = node->dequantizeLowerX(); - const vfloat<N> upper_x = node->dequantizeUpperX(); - const vfloat<N> lower_y = node->dequantizeLowerY(); - const vfloat<N> upper_y = node->dequantizeUpperY(); - const vfloat<N> lower_z = node->dequantizeLowerZ(); - const vfloat<N> upper_z = node->dequantizeUpperZ(); - - const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z; - const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z; - - const float round_up = 1.0f+3.0f*float(ulp); - const float round_down = 1.0f-3.0f*float(ulp); - - const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); - const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); - const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); - dist = lnearP; - return lhit; - } - - template<int N, int K> - __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i, - const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist) - - { - assert(movemask(node->validMask()) & ((size_t)1 << i)); - - const vfloat<K> lower_x = node->dequantizeLowerX(i,time); - const vfloat<K> upper_x = node->dequantizeUpperX(i,time); - const vfloat<K> lower_y = node->dequantizeLowerY(i,time); - const vfloat<K> upper_y = node->dequantizeUpperY(i,time); - const vfloat<K> lower_z = node->dequantizeLowerZ(i,time); - const vfloat<K> upper_z = node->dequantizeUpperZ(i,time); - -#if defined(__aarch64__) - const vfloat<K> lclipMinX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMinY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMinZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<K> lclipMaxX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMaxY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMaxZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z); -#elif defined(__AVX2__) - const vfloat<K> lclipMinX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); - const vfloat<K> lclipMinY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); - const vfloat<K> lclipMinZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); - const vfloat<K> lclipMaxX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); - const vfloat<K> lclipMaxY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); - const vfloat<K> lclipMaxZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); -#else - const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z; - const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z; - #endif - const vfloat<K> lnearP = max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); - const vfloat<K> lfarP = min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); - const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); - dist = lnearP; - return lhit; - } - - - template<int N, int K> - __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i, - const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist) - - { - assert(movemask(node->validMask()) & ((size_t)1 << i)); - - const vfloat<K> lower_x = node->dequantizeLowerX(i,time); - const vfloat<K> upper_x = node->dequantizeUpperX(i,time); - const vfloat<K> lower_y = node->dequantizeLowerY(i,time); - const vfloat<K> upper_y = node->dequantizeUpperY(i,time); - const vfloat<K> lower_z = node->dequantizeLowerZ(i,time); - const vfloat<K> upper_z = node->dequantizeUpperZ(i,time); - - const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z; - const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x; - const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y; - const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z; - - const float round_up = 1.0f+3.0f*float(ulp); - const float round_down = 1.0f-3.0f*float(ulp); - - const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); - const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); - const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); - dist = lnearP; - return lhit; - } - - - ////////////////////////////////////////////////////////////////////////////////////// - // Node intersectors used in hybrid traversal - ////////////////////////////////////////////////////////////////////////////////////// - - /*! Intersects N nodes with K rays */ - template<int N, int K, int types, bool robust> - struct BVHNNodeIntersectorK; - - template<int N, int K> - struct BVHNNodeIntersectorK<N, K, BVH_AN1, false> - { - /* vmask is both an input and an output parameter! Its initial value should be the parent node - hit mask, which is used for correctly computing the current hit mask. The parent hit mask - is actually required only for motion blur node intersections (because different rays may - have different times), so for regular nodes vmask is simply overwritten. */ - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, - const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) - { - vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist); - return true; - } - }; - - template<int N, int K> - struct BVHNNodeIntersectorK<N, K, BVH_AN1, true> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, - const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) - { - vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist); - return true; - } - }; - - template<int N, int K> - struct BVHNNodeIntersectorK<N, K, BVH_AN2, false> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, - const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) - { - vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist); - return true; - } - }; - - template<int N, int K> - struct BVHNNodeIntersectorK<N, K, BVH_AN2, true> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, - const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) - { - vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist); - return true; - } - }; - - template<int N, int K> - struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, false> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, - const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) - { - if (likely(node.isAABBNode())) vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist); - else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist); - return true; - } - }; - - template<int N, int K> - struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, true> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, - const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) - { - if (likely(node.isAABBNode())) vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist); - else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist); - return true; - } - }; - - template<int N, int K> - struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, false> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, - const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) - { - if (likely(node.isAABBNodeMB())) vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist); - else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist); - return true; - } - }; - - template<int N, int K> - struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, true> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, - const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) - { - if (likely(node.isAABBNodeMB())) vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist); - else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist); - return true; - } - }; - - template<int N, int K> - struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, false> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, - const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) - { - vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist); - return true; - } - }; - - template<int N, int K> - struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, true> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, - const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) - { - vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist); - return true; - } - }; - - template<int N, int K> - struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, false> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, - const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) - { - if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) { - vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist); - } else /*if (unlikely(node.isOBBNodeMB()))*/ { - assert(node.isOBBNodeMB()); - vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist); - } - return true; - } - }; - - template<int N, int K> - struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, true> - { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, - const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) - { - if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) { - vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist); - } else /*if (unlikely(node.isOBBNodeMB()))*/ { - assert(node.isOBBNodeMB()); - vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist); - } - return true; - } - }; - - - /*! Intersects N nodes with K rays */ - template<int N, int K, bool robust> - struct BVHNQuantizedBaseNodeIntersectorK; - - template<int N, int K> - struct BVHNQuantizedBaseNodeIntersectorK<N, K, false> - { - static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i, - const TravRayK<K,false>& ray, vfloat<K>& dist) - { - return intersectQuantizedNodeK<N,K>(node,i,ray,dist); - } - - static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i, - const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist) - { - return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist); - } - - }; - - template<int N, int K> - struct BVHNQuantizedBaseNodeIntersectorK<N, K, true> - { - static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i, - const TravRayK<K,true>& ray, vfloat<K>& dist) - { - return intersectQuantizedNodeK<N,K>(node,i,ray,dist); - } - - static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i, - const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist) - { - return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist); - } - }; - - - } -} diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet_stream.h b/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet_stream.h deleted file mode 100644 index f379b57aea..0000000000 --- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet_stream.h +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "node_intersector.h" - -namespace embree -{ - namespace isa - { - ////////////////////////////////////////////////////////////////////////////////////// - // Ray packet structure used in stream traversal - ////////////////////////////////////////////////////////////////////////////////////// - - template<int K, bool robust> - struct TravRayKStream; - - /* Fast variant */ - template<int K> - struct TravRayKStream<K, false> - { - __forceinline TravRayKStream() {} - - __forceinline TravRayKStream(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar) - { - init(ray_org, ray_dir); - tnear = ray_tnear; - tfar = ray_tfar; - } - - __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir) - { - rdir = rcp_safe(ray_dir); -#if defined(__aarch64__) - neg_org_rdir = -(ray_org * rdir); -#else - org_rdir = ray_org * rdir; -#endif - } - - Vec3vf<K> rdir; -#if defined(__aarch64__) - Vec3vf<K> neg_org_rdir; -#else - Vec3vf<K> org_rdir; -#endif - vfloat<K> tnear; - vfloat<K> tfar; - }; - - template<int K> - using TravRayKStreamFast = TravRayKStream<K, false>; - - /* Robust variant */ - template<int K> - struct TravRayKStream<K, true> - { - __forceinline TravRayKStream() {} - - __forceinline TravRayKStream(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar) - { - init(ray_org, ray_dir); - tnear = ray_tnear; - tfar = ray_tfar; - } - - __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir) - { - rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir)); - org = ray_org; - } - - Vec3vf<K> rdir; - Vec3vf<K> org; - vfloat<K> tnear; - vfloat<K> tfar; - }; - - template<int K> - using TravRayKStreamRobust = TravRayKStream<K, true>; - - ////////////////////////////////////////////////////////////////////////////////////// - // Fast AABBNode intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, int Nx, int K> - __forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node, - const TravRayKStreamFast<K>& ray, size_t k, const NearFarPrecalculations& nf) - { - const vfloat<Nx> bminX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX)); - const vfloat<Nx> bminY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY)); - const vfloat<Nx> bminZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ)); - const vfloat<Nx> bmaxX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX)); - const vfloat<Nx> bmaxY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY)); - const vfloat<Nx> bmaxZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ)); - -#if defined (__aarch64__) - const vfloat<Nx> rminX = madd(bminX, vfloat<Nx>(ray.rdir.x[k]), vfloat<Nx>(ray.neg_org_rdir.x[k])); - const vfloat<Nx> rminY = madd(bminY, vfloat<Nx>(ray.rdir.y[k]), vfloat<Nx>(ray.neg_org_rdir.y[k])); - const vfloat<Nx> rminZ = madd(bminZ, vfloat<Nx>(ray.rdir.z[k]), vfloat<Nx>(ray.neg_org_rdir.z[k])); - const vfloat<Nx> rmaxX = madd(bmaxX, vfloat<Nx>(ray.rdir.x[k]), vfloat<Nx>(ray.neg_org_rdir.x[k])); - const vfloat<Nx> rmaxY = madd(bmaxY, vfloat<Nx>(ray.rdir.y[k]), vfloat<Nx>(ray.neg_org_rdir.y[k])); - const vfloat<Nx> rmaxZ = madd(bmaxZ, vfloat<Nx>(ray.rdir.z[k]), vfloat<Nx>(ray.neg_org_rdir.z[k])); -#else - const vfloat<Nx> rminX = msub(bminX, vfloat<Nx>(ray.rdir.x[k]), vfloat<Nx>(ray.org_rdir.x[k])); - const vfloat<Nx> rminY = msub(bminY, vfloat<Nx>(ray.rdir.y[k]), vfloat<Nx>(ray.org_rdir.y[k])); - const vfloat<Nx> rminZ = msub(bminZ, vfloat<Nx>(ray.rdir.z[k]), vfloat<Nx>(ray.org_rdir.z[k])); - const vfloat<Nx> rmaxX = msub(bmaxX, vfloat<Nx>(ray.rdir.x[k]), vfloat<Nx>(ray.org_rdir.x[k])); - const vfloat<Nx> rmaxY = msub(bmaxY, vfloat<Nx>(ray.rdir.y[k]), vfloat<Nx>(ray.org_rdir.y[k])); - const vfloat<Nx> rmaxZ = msub(bmaxZ, vfloat<Nx>(ray.rdir.z[k]), vfloat<Nx>(ray.org_rdir.z[k])); -#endif - const vfloat<Nx> rmin = maxi(rminX, rminY, rminZ, vfloat<Nx>(ray.tnear[k])); - const vfloat<Nx> rmax = mini(rmaxX, rmaxY, rmaxZ, vfloat<Nx>(ray.tfar[k])); - - const vbool<Nx> vmask_first_hit = rmin <= rmax; - - return movemask(vmask_first_hit) & (((size_t)1 << N)-1); - } - - template<int N, int K> - __forceinline size_t intersectNodeK(const typename BVHN<N>::AABBNode* __restrict__ node, size_t i, - const TravRayKStreamFast<K>& ray, const NearFarPrecalculations& nf) - { - char* ptr = (char*)&node->lower_x + i*sizeof(float); - const vfloat<K> bminX = *(const float*)(ptr + nf.nearX); - const vfloat<K> bminY = *(const float*)(ptr + nf.nearY); - const vfloat<K> bminZ = *(const float*)(ptr + nf.nearZ); - const vfloat<K> bmaxX = *(const float*)(ptr + nf.farX); - const vfloat<K> bmaxY = *(const float*)(ptr + nf.farY); - const vfloat<K> bmaxZ = *(const float*)(ptr + nf.farZ); - -#if defined (__aarch64__) - const vfloat<K> rminX = madd(bminX, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> rminY = madd(bminY, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> rminZ = madd(bminZ, ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<K> rmaxX = madd(bmaxX, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> rmaxY = madd(bmaxY, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> rmaxZ = madd(bmaxZ, ray.rdir.z, ray.neg_org_rdir.z); -#else - const vfloat<K> rminX = msub(bminX, ray.rdir.x, ray.org_rdir.x); - const vfloat<K> rminY = msub(bminY, ray.rdir.y, ray.org_rdir.y); - const vfloat<K> rminZ = msub(bminZ, ray.rdir.z, ray.org_rdir.z); - const vfloat<K> rmaxX = msub(bmaxX, ray.rdir.x, ray.org_rdir.x); - const vfloat<K> rmaxY = msub(bmaxY, ray.rdir.y, ray.org_rdir.y); - const vfloat<K> rmaxZ = msub(bmaxZ, ray.rdir.z, ray.org_rdir.z); -#endif - - const vfloat<K> rmin = maxi(rminX, rminY, rminZ, ray.tnear); - const vfloat<K> rmax = mini(rmaxX, rmaxY, rmaxZ, ray.tfar); - - const vbool<K> vmask_first_hit = rmin <= rmax; - - return movemask(vmask_first_hit); - } - - ////////////////////////////////////////////////////////////////////////////////////// - // Robust AABBNode intersection - ////////////////////////////////////////////////////////////////////////////////////// - - template<int N, int Nx, int K> - __forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node, - const TravRayKStreamRobust<K>& ray, size_t k, const NearFarPrecalculations& nf) - { - const vfloat<Nx> bminX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX)); - const vfloat<Nx> bminY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY)); - const vfloat<Nx> bminZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ)); - const vfloat<Nx> bmaxX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX)); - const vfloat<Nx> bmaxY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY)); - const vfloat<Nx> bmaxZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ)); - - const vfloat<Nx> rminX = (bminX - vfloat<Nx>(ray.org.x[k])) * vfloat<Nx>(ray.rdir.x[k]); - const vfloat<Nx> rminY = (bminY - vfloat<Nx>(ray.org.y[k])) * vfloat<Nx>(ray.rdir.y[k]); - const vfloat<Nx> rminZ = (bminZ - vfloat<Nx>(ray.org.z[k])) * vfloat<Nx>(ray.rdir.z[k]); - const vfloat<Nx> rmaxX = (bmaxX - vfloat<Nx>(ray.org.x[k])) * vfloat<Nx>(ray.rdir.x[k]); - const vfloat<Nx> rmaxY = (bmaxY - vfloat<Nx>(ray.org.y[k])) * vfloat<Nx>(ray.rdir.y[k]); - const vfloat<Nx> rmaxZ = (bmaxZ - vfloat<Nx>(ray.org.z[k])) * vfloat<Nx>(ray.rdir.z[k]); - const float round_up = 1.0f+3.0f*float(ulp); // FIXME: use per instruction rounding for AVX512 - const vfloat<Nx> rmin = max(rminX, rminY, rminZ, vfloat<Nx>(ray.tnear[k])); - const vfloat<Nx> rmax = round_up *min(rmaxX, rmaxY, rmaxZ, vfloat<Nx>(ray.tfar[k])); - - const vbool<Nx> vmask_first_hit = rmin <= rmax; - - return movemask(vmask_first_hit) & (((size_t)1 << N)-1); - } - - template<int N, int K> - __forceinline size_t intersectNodeK(const typename BVHN<N>::AABBNode* __restrict__ node, size_t i, - const TravRayKStreamRobust<K>& ray, const NearFarPrecalculations& nf) - { - char *ptr = (char*)&node->lower_x + i*sizeof(float); - const vfloat<K> bminX = *(const float*)(ptr + nf.nearX); - const vfloat<K> bminY = *(const float*)(ptr + nf.nearY); - const vfloat<K> bminZ = *(const float*)(ptr + nf.nearZ); - const vfloat<K> bmaxX = *(const float*)(ptr + nf.farX); - const vfloat<K> bmaxY = *(const float*)(ptr + nf.farY); - const vfloat<K> bmaxZ = *(const float*)(ptr + nf.farZ); - - const vfloat<K> rminX = (bminX - ray.org.x) * ray.rdir.x; - const vfloat<K> rminY = (bminY - ray.org.y) * ray.rdir.y; - const vfloat<K> rminZ = (bminZ - ray.org.z) * ray.rdir.z; - const vfloat<K> rmaxX = (bmaxX - ray.org.x) * ray.rdir.x; - const vfloat<K> rmaxY = (bmaxY - ray.org.y) * ray.rdir.y; - const vfloat<K> rmaxZ = (bmaxZ - ray.org.z) * ray.rdir.z; - - const float round_up = 1.0f+3.0f*float(ulp); - const vfloat<K> rmin = max(rminX, rminY, rminZ, vfloat<K>(ray.tnear)); - const vfloat<K> rmax = round_up * min(rmaxX, rmaxY, rmaxZ, vfloat<K>(ray.tfar)); - - const vbool<K> vmask_first_hit = rmin <= rmax; - - return movemask(vmask_first_hit); - } - } -} |