summaryrefslogtreecommitdiff
path: root/thirdparty/embree-aarch64/kernels/bvh
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/embree-aarch64/kernels/bvh')
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh.cpp190
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh.h235
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.cpp1325
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.h316
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.cpp1165
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.h280
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder.cpp60
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder.h114
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder_morton.cpp531
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah.cpp640
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_mb.cpp705
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_spatial.cpp201
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.cpp377
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.h263
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel_internal.h267
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_collider.cpp375
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_collider.h72
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_factory.h21
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.cpp330
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.h37
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1_bvh4.cpp61
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_hybrid.h61
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream.h295
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream_filters.h41
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb.h213
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb.h247
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb4d.h107
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_node_base.h43
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb.h98
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb_mb.h90
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_node_qaabb.h265
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_node_ref.h242
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_refit.cpp247
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_refit.h95
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.cpp127
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.h37
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp168
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.h285
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_traverser1.h676
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_traverser_stream.h154
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/node_intersector.h31
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/node_intersector1.h1788
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/node_intersector_frustum.h269
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet.h843
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet_stream.h215
45 files changed, 0 insertions, 14202 deletions
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh.cpp
deleted file mode 100644
index bd102bd6ef..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh.cpp
+++ /dev/null
@@ -1,190 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh.h"
-#include "bvh_statistics.h"
-
-namespace embree
-{
- template<int N>
- BVHN<N>::BVHN (const PrimitiveType& primTy, Scene* scene)
- : AccelData((N==4) ? AccelData::TY_BVH4 : (N==8) ? AccelData::TY_BVH8 : AccelData::TY_UNKNOWN),
- primTy(&primTy), device(scene->device), scene(scene),
- root(emptyNode), alloc(scene->device,scene->isStaticAccel()), numPrimitives(0), numVertices(0)
- {
- }
-
- template<int N>
- BVHN<N>::~BVHN ()
- {
- for (size_t i=0; i<objects.size(); i++)
- delete objects[i];
- }
-
- template<int N>
- void BVHN<N>::clear()
- {
- set(BVHN::emptyNode,empty,0);
- alloc.clear();
- }
-
- template<int N>
- void BVHN<N>::set (NodeRef root, const LBBox3fa& bounds, size_t numPrimitives)
- {
- this->root = root;
- this->bounds = bounds;
- this->numPrimitives = numPrimitives;
- }
-
- template<int N>
- void BVHN<N>::clearBarrier(NodeRef& node)
- {
- if (node.isBarrier())
- node.clearBarrier();
- else if (!node.isLeaf()) {
- BaseNode* n = node.baseNode(); // FIXME: flags should be stored in BVH
- for (size_t c=0; c<N; c++)
- clearBarrier(n->child(c));
- }
- }
-
- template<int N>
- void BVHN<N>::layoutLargeNodes(size_t num)
- {
-#if defined(__X86_64__) || defined(__aarch64__) // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues
- struct NodeArea
- {
- __forceinline NodeArea() {}
-
- __forceinline NodeArea(NodeRef& node, const BBox3fa& bounds)
- : node(&node), A(node.isLeaf() ? float(neg_inf) : area(bounds)) {}
-
- __forceinline bool operator< (const NodeArea& other) const {
- return this->A < other.A;
- }
-
- NodeRef* node;
- float A;
- };
- std::vector<NodeArea> lst;
- lst.reserve(num);
- lst.push_back(NodeArea(root,empty));
-
- while (lst.size() < num)
- {
- std::pop_heap(lst.begin(), lst.end());
- NodeArea n = lst.back(); lst.pop_back();
- if (!n.node->isAABBNode()) break;
- AABBNode* node = n.node->getAABBNode();
- for (size_t i=0; i<N; i++) {
- if (node->child(i) == BVHN::emptyNode) continue;
- lst.push_back(NodeArea(node->child(i),node->bounds(i)));
- std::push_heap(lst.begin(), lst.end());
- }
- }
-
- for (size_t i=0; i<lst.size(); i++)
- lst[i].node->setBarrier();
-
- root = layoutLargeNodesRecursion(root,alloc.getCachedAllocator());
-#endif
- }
-
- template<int N>
- typename BVHN<N>::NodeRef BVHN<N>::layoutLargeNodesRecursion(NodeRef& node, const FastAllocator::CachedAllocator& allocator)
- {
- if (node.isBarrier()) {
- node.clearBarrier();
- return node;
- }
- else if (node.isAABBNode())
- {
- AABBNode* oldnode = node.getAABBNode();
- AABBNode* newnode = (BVHN::AABBNode*) allocator.malloc0(sizeof(BVHN::AABBNode),byteNodeAlignment);
- *newnode = *oldnode;
- for (size_t c=0; c<N; c++)
- newnode->child(c) = layoutLargeNodesRecursion(oldnode->child(c),allocator);
- return encodeNode(newnode);
- }
- else return node;
- }
-
- template<int N>
- double BVHN<N>::preBuild(const std::string& builderName)
- {
- if (builderName == "")
- return inf;
-
- if (device->verbosity(2))
- {
- Lock<MutexSys> lock(g_printMutex);
- std::cout << "building BVH" << N << (builderName.find("MBlur") != std::string::npos ? "MB" : "") << "<" << primTy->name() << "> using " << builderName << " ..." << std::endl << std::flush;
- }
-
- double t0 = 0.0;
- if (device->benchmark || device->verbosity(2)) t0 = getSeconds();
- return t0;
- }
-
- template<int N>
- void BVHN<N>::postBuild(double t0)
- {
- if (t0 == double(inf))
- return;
-
- double dt = 0.0;
- if (device->benchmark || device->verbosity(2))
- dt = getSeconds()-t0;
-
- std::unique_ptr<BVHNStatistics<N>> stat;
-
- /* print statistics */
- if (device->verbosity(2))
- {
- if (!stat) stat.reset(new BVHNStatistics<N>(this));
- const size_t usedBytes = alloc.getUsedBytes();
- Lock<MutexSys> lock(g_printMutex);
- std::cout << "finished BVH" << N << "<" << primTy->name() << "> : " << 1000.0f*dt << "ms, " << 1E-6*double(numPrimitives)/dt << " Mprim/s, " << 1E-9*double(usedBytes)/dt << " GB/s" << std::endl;
-
- if (device->verbosity(2))
- std::cout << stat->str();
-
- if (device->verbosity(2))
- {
- FastAllocator::AllStatistics stat(&alloc);
- for (size_t i=0; i<objects.size(); i++)
- if (objects[i])
- stat = stat + FastAllocator::AllStatistics(&objects[i]->alloc);
-
- stat.print(numPrimitives);
- }
-
- if (device->verbosity(3))
- {
- alloc.print_blocks();
- for (size_t i=0; i<objects.size(); i++)
- if (objects[i])
- objects[i]->alloc.print_blocks();
- }
-
- std::cout << std::flush;
- }
-
- /* benchmark mode */
- if (device->benchmark)
- {
- if (!stat) stat.reset(new BVHNStatistics<N>(this));
- Lock<MutexSys> lock(g_printMutex);
- std::cout << "BENCHMARK_BUILD " << dt << " " << double(numPrimitives)/dt << " " << stat->sah() << " " << stat->bytesUsed() << " BVH" << N << "<" << primTy->name() << ">" << std::endl << std::flush;
- }
- }
-
-#if defined(__AVX__)
- template class BVHN<8>;
-#endif
-
-#if !defined(__AVX__) || !defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42) || defined(__aarch64__)
- template class BVHN<4>;
-#endif
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh.h b/thirdparty/embree-aarch64/kernels/bvh/bvh.h
deleted file mode 100644
index 8fdf912e52..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh.h
+++ /dev/null
@@ -1,235 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-/* include all node types */
-#include "bvh_node_aabb.h"
-#include "bvh_node_aabb_mb.h"
-#include "bvh_node_aabb_mb4d.h"
-#include "bvh_node_obb.h"
-#include "bvh_node_obb_mb.h"
-#include "bvh_node_qaabb.h"
-
-namespace embree
-{
- /*! flags used to enable specific node types in intersectors */
- enum BVHNodeFlags
- {
- BVH_FLAG_ALIGNED_NODE = 0x00001,
- BVH_FLAG_ALIGNED_NODE_MB = 0x00010,
- BVH_FLAG_UNALIGNED_NODE = 0x00100,
- BVH_FLAG_UNALIGNED_NODE_MB = 0x01000,
- BVH_FLAG_QUANTIZED_NODE = 0x100000,
- BVH_FLAG_ALIGNED_NODE_MB4D = 0x1000000,
-
- /* short versions */
- BVH_AN1 = BVH_FLAG_ALIGNED_NODE,
- BVH_AN2 = BVH_FLAG_ALIGNED_NODE_MB,
- BVH_AN2_AN4D = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D,
- BVH_UN1 = BVH_FLAG_UNALIGNED_NODE,
- BVH_UN2 = BVH_FLAG_UNALIGNED_NODE_MB,
- BVH_MB = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_UNALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D,
- BVH_AN1_UN1 = BVH_FLAG_ALIGNED_NODE | BVH_FLAG_UNALIGNED_NODE,
- BVH_AN2_UN2 = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_UNALIGNED_NODE_MB,
- BVH_AN2_AN4D_UN2 = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D | BVH_FLAG_UNALIGNED_NODE_MB,
- BVH_QN1 = BVH_FLAG_QUANTIZED_NODE
- };
-
- /*! Multi BVH with N children. Each node stores the bounding box of
- * it's N children as well as N child references. */
- template<int N>
- class BVHN : public AccelData
- {
- ALIGNED_CLASS_(16);
- public:
-
- /*! forward declaration of node ref type */
- typedef NodeRefPtr<N> NodeRef;
- typedef BaseNode_t<NodeRef,N> BaseNode;
- typedef AABBNode_t<NodeRef,N> AABBNode;
- typedef AABBNodeMB_t<NodeRef,N> AABBNodeMB;
- typedef AABBNodeMB4D_t<NodeRef,N> AABBNodeMB4D;
- typedef OBBNode_t<NodeRef,N> OBBNode;
- typedef OBBNodeMB_t<NodeRef,N> OBBNodeMB;
- typedef QuantizedBaseNode_t<N> QuantizedBaseNode;
- typedef QuantizedBaseNodeMB_t<N> QuantizedBaseNodeMB;
- typedef QuantizedNode_t<NodeRef,N> QuantizedNode;
-
- /*! Number of bytes the nodes and primitives are minimally aligned to.*/
- static const size_t byteAlignment = 16;
- static const size_t byteNodeAlignment = 4*N;
-
- /*! Empty node */
- static const size_t emptyNode = NodeRef::emptyNode;
-
- /*! Invalid node, used as marker in traversal */
- static const size_t invalidNode = NodeRef::invalidNode;
- static const size_t popRay = NodeRef::popRay;
-
- /*! Maximum depth of the BVH. */
- static const size_t maxBuildDepth = 32;
- static const size_t maxBuildDepthLeaf = maxBuildDepth+8;
- static const size_t maxDepth = 2*maxBuildDepthLeaf; // 2x because of two level builder
-
- /*! Maximum number of primitive blocks in a leaf. */
- static const size_t maxLeafBlocks = NodeRef::maxLeafBlocks;
-
- public:
-
- /*! Builder interface to create allocator */
- struct CreateAlloc : public FastAllocator::Create {
- __forceinline CreateAlloc (BVHN* bvh) : FastAllocator::Create(&bvh->alloc) {}
- };
-
- typedef BVHNodeRecord<NodeRef> NodeRecord;
- typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
- typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
-
- public:
-
- /*! BVHN default constructor. */
- BVHN (const PrimitiveType& primTy, Scene* scene);
-
- /*! BVHN destruction */
- ~BVHN ();
-
- /*! clears the acceleration structure */
- void clear();
-
- /*! sets BVH members after build */
- void set (NodeRef root, const LBBox3fa& bounds, size_t numPrimitives);
-
- /*! Clears the barrier bits of a subtree. */
- void clearBarrier(NodeRef& node);
-
- /*! lays out num large nodes of the BVH */
- void layoutLargeNodes(size_t num);
- NodeRef layoutLargeNodesRecursion(NodeRef& node, const FastAllocator::CachedAllocator& allocator);
-
- /*! called by all builders before build starts */
- double preBuild(const std::string& builderName);
-
- /*! called by all builders after build ended */
- void postBuild(double t0);
-
- /*! allocator class */
- struct Allocator {
- BVHN* bvh;
- Allocator (BVHN* bvh) : bvh(bvh) {}
- __forceinline void* operator() (size_t bytes) const {
- return bvh->alloc._threadLocal()->malloc(&bvh->alloc,bytes);
- }
- };
-
- /*! post build cleanup */
- void cleanup() {
- alloc.cleanup();
- }
-
- public:
-
- /*! Encodes a node */
- static __forceinline NodeRef encodeNode(AABBNode* node) { return NodeRef::encodeNode(node); }
- static __forceinline NodeRef encodeNode(AABBNodeMB* node) { return NodeRef::encodeNode(node); }
- static __forceinline NodeRef encodeNode(AABBNodeMB4D* node) { return NodeRef::encodeNode(node); }
- static __forceinline NodeRef encodeNode(OBBNode* node) { return NodeRef::encodeNode(node); }
- static __forceinline NodeRef encodeNode(OBBNodeMB* node) { return NodeRef::encodeNode(node); }
- static __forceinline NodeRef encodeLeaf(void* tri, size_t num) { return NodeRef::encodeLeaf(tri,num); }
- static __forceinline NodeRef encodeTypedLeaf(void* ptr, size_t ty) { return NodeRef::encodeTypedLeaf(ptr,ty); }
-
- public:
-
- /*! Prefetches the node this reference points to */
- __forceinline static void prefetch(const NodeRef ref, int types=0)
- {
-#if defined(__AVX512PF__) // MIC
- if (types != BVH_FLAG_QUANTIZED_NODE) {
- prefetchL2(((char*)ref.ptr)+0*64);
- prefetchL2(((char*)ref.ptr)+1*64);
- if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
- prefetchL2(((char*)ref.ptr)+2*64);
- prefetchL2(((char*)ref.ptr)+3*64);
- }
- if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
- /* KNL still needs L2 prefetches for large nodes */
- prefetchL2(((char*)ref.ptr)+4*64);
- prefetchL2(((char*)ref.ptr)+5*64);
- prefetchL2(((char*)ref.ptr)+6*64);
- prefetchL2(((char*)ref.ptr)+7*64);
- }
- }
- else
- {
- /* todo: reduce if 32bit offsets are enabled */
- prefetchL2(((char*)ref.ptr)+0*64);
- prefetchL2(((char*)ref.ptr)+1*64);
- prefetchL2(((char*)ref.ptr)+2*64);
- }
-#else
- if (types != BVH_FLAG_QUANTIZED_NODE) {
- prefetchL1(((char*)ref.ptr)+0*64);
- prefetchL1(((char*)ref.ptr)+1*64);
- if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
- prefetchL1(((char*)ref.ptr)+2*64);
- prefetchL1(((char*)ref.ptr)+3*64);
- }
- if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
- /* deactivate for large nodes on Xeon, as it introduces regressions */
- //prefetchL1(((char*)ref.ptr)+4*64);
- //prefetchL1(((char*)ref.ptr)+5*64);
- //prefetchL1(((char*)ref.ptr)+6*64);
- //prefetchL1(((char*)ref.ptr)+7*64);
- }
- }
- else
- {
- /* todo: reduce if 32bit offsets are enabled */
- prefetchL1(((char*)ref.ptr)+0*64);
- prefetchL1(((char*)ref.ptr)+1*64);
- prefetchL1(((char*)ref.ptr)+2*64);
- }
-#endif
- }
-
- __forceinline static void prefetchW(const NodeRef ref, int types=0)
- {
- embree::prefetchEX(((char*)ref.ptr)+0*64);
- embree::prefetchEX(((char*)ref.ptr)+1*64);
- if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
- embree::prefetchEX(((char*)ref.ptr)+2*64);
- embree::prefetchEX(((char*)ref.ptr)+3*64);
- }
- if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
- embree::prefetchEX(((char*)ref.ptr)+4*64);
- embree::prefetchEX(((char*)ref.ptr)+5*64);
- embree::prefetchEX(((char*)ref.ptr)+6*64);
- embree::prefetchEX(((char*)ref.ptr)+7*64);
- }
- }
-
- /*! bvh type information */
- public:
- const PrimitiveType* primTy; //!< primitive type stored in the BVH
-
- /*! bvh data */
- public:
- Device* device; //!< device pointer
- Scene* scene; //!< scene pointer
- NodeRef root; //!< root node
- FastAllocator alloc; //!< allocator used to allocate nodes
-
- /*! statistics data */
- public:
- size_t numPrimitives; //!< number of primitives the BVH is build over
- size_t numVertices; //!< number of vertices the BVH references
-
- /*! data arrays for special builders */
- public:
- std::vector<BVHN*> objects;
- vector_t<char,aligned_allocator<char,32>> subdiv_patches;
- };
-
- typedef BVHN<4> BVH4;
- typedef BVHN<8> BVH8;
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.cpp
deleted file mode 100644
index 23f4f63d45..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.cpp
+++ /dev/null
@@ -1,1325 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh4_factory.h"
-#include "../bvh/bvh.h"
-
-#include "../geometry/curveNv.h"
-#include "../geometry/curveNi.h"
-#include "../geometry/curveNi_mb.h"
-#include "../geometry/linei.h"
-#include "../geometry/triangle.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/trianglev_mb.h"
-#include "../geometry/trianglei.h"
-#include "../geometry/quadv.h"
-#include "../geometry/quadi.h"
-#include "../geometry/subdivpatch1.h"
-#include "../geometry/object.h"
-#include "../geometry/instance.h"
-#include "../geometry/subgrid.h"
-#include "../common/accelinstance.h"
-
-namespace embree
-{
- DECLARE_SYMBOL2(Accel::Collider,BVH4ColliderUserGeom);
-
- DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector4i,void);
- DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector8i,void);
- DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector4v,void);
- DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector8v,void);
- DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector4iMB,void);
- DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector8iMB,void);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1MB);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1MB);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4Intersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vIntersector1Pluecker);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Pluecker);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Pluecker);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,QBVH4Triangle4iIntersector1Pluecker);
- DECLARE_SYMBOL2(Accel::Intersector1,QBVH4Quad4iIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1Intersector1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1MBIntersector1);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4VirtualIntersector1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4VirtualMBIntersector1);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4InstanceIntersector1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4InstanceMBIntersector1);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4GridMBIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4HybridMB);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4HybridMB);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vIntersector4HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1Intersector4);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1MBIntersector4);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4VirtualIntersector4Chunk);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4VirtualMBIntersector4Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4InstanceIntersector4Chunk);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4InstanceMBIntersector4Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4GridMBIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8HybridMB);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8HybridMB);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vIntersector8HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1Intersector8);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1MBIntersector8);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4VirtualIntersector8Chunk);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4VirtualMBIntersector8Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4InstanceIntersector8Chunk);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4InstanceMBIntersector8Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4GridMBIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16HybridMB);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16HybridMB);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vIntersector16HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1Intersector16);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1MBIntersector16);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4VirtualIntersector16Chunk);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4VirtualMBIntersector16Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4InstanceIntersector16Chunk);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4InstanceMBIntersector16Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4GridMBIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4IntersectorStreamPacketFallback);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4IntersectorStreamMoeller);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4IntersectorStreamMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4iIntersectorStreamMoeller);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4vIntersectorStreamPluecker);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4iIntersectorStreamPluecker);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4vIntersectorStreamMoeller);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4vIntersectorStreamMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4iIntersectorStreamMoeller);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4vIntersectorStreamPluecker);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4iIntersectorStreamPluecker);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4VirtualIntersectorStream);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4InstanceIntersectorStream);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4Curve4vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Curve4iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4OBBCurve4iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Curve8iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
- DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1BuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1MBBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshRefitSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshRefitSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
-
- BVH4Factory::BVH4Factory(int bfeatures, int ifeatures)
- {
- SELECT_SYMBOL_DEFAULT_AVX_AVX2(ifeatures,BVH4ColliderUserGeom);
-
- selectBuilders(bfeatures);
- selectIntersectors(ifeatures);
- }
-
- void BVH4Factory::selectBuilders(int features)
- {
- IF_ENABLED_TRIS (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelTriangle4MeshSAH));
- IF_ENABLED_TRIS (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelTriangle4iMeshSAH));
- IF_ENABLED_TRIS (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelTriangle4vMeshSAH));
- IF_ENABLED_QUADS (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelQuadMeshSAH));
- IF_ENABLED_USER (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelVirtualSAH));
- IF_ENABLED_INSTANCE (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelInstanceSAH));
-
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Curve4vBuilder_OBB_New));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Curve4iBuilder_OBB_New));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4OBBCurve4iMBBuilder_OBB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX(features,BVH4Curve8iBuilder_OBB_New));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX(features,BVH4OBBCurve8iMBBuilder_OBB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Triangle4SceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Triangle4vSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Triangle4iSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4iMBSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4vMBSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4QuantizedTriangle4iSceneBuilderSAH));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Quad4vSceneBuilderSAH));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Quad4iSceneBuilderSAH));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Quad4iMBSceneBuilderSAH));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4QuantizedQuad4iSceneBuilderSAH));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4SceneBuilderFastSpatialSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4vSceneBuilderFastSpatialSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4iSceneBuilderFastSpatialSAH));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Quad4vSceneBuilderFastSpatialSAH));
-
- IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4VirtualSceneBuilderSAH));
- IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4VirtualMBSceneBuilderSAH));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4InstanceSceneBuilderSAH));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4InstanceMBSceneBuilderSAH));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4GridSceneBuilderSAH));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4GridMBSceneBuilderSAH));
-
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4SubdivPatch1BuilderSAH));
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4SubdivPatch1MBBuilderSAH));
- }
-
- void BVH4Factory::selectIntersectors(int features)
- {
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector4i));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8i));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector4v));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8v));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector4iMB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8iMB));
-
- /* select intersectors1 */
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector1));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector1MB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust1));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust1MB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4Intersector1Moeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,BVH4Triangle4iIntersector1Moeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,BVH4Triangle4vIntersector1Pluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,BVH4Triangle4iIntersector1Pluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector1Moeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector1Moeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector1Pluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector1Pluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector1Moeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector1Moeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector1Pluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector1Pluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector1Pluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector1Moeller));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,QBVH4Triangle4iIntersector1Pluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,QBVH4Quad4iIntersector1Pluecker));
-
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1Intersector1));
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1MBIntersector1));
-
- IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4VirtualIntersector1));
- IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4VirtualMBIntersector1));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4InstanceIntersector1));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4InstanceMBIntersector1));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector1Moeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridMBIntersector1Moeller))
- IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector1Pluecker));
-
-#if defined (EMBREE_RAY_PACKETS)
-
- /* select intersectors4 */
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector4Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector4HybridMB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust4Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust4HybridMB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4Intersector4HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4Intersector4HybridMoellerNoFilter));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iIntersector4HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vIntersector4HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iIntersector4HybridPluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector4HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector4HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector4HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector4HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector4HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector4HybridMoellerNoFilter));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector4HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector4HybridPluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector4HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector4HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector4HybridPluecker));
-
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1Intersector4));
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1MBIntersector4));
-
- IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4VirtualIntersector4Chunk));
- IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4VirtualMBIntersector4Chunk));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4InstanceIntersector4Chunk));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4InstanceMBIntersector4Chunk));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector4HybridMoeller));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector4HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridMBIntersector4HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector4HybridPluecker));
-
- /* select intersectors8 */
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector8Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector8HybridMB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust8Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust8HybridMB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4Intersector8HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4Intersector8HybridMoellerNoFilter));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iIntersector8HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vIntersector8HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iIntersector8HybridPluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector8HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector8HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector8HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector8HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector8HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector8HybridMoellerNoFilter));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector8HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector8HybridPluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector8HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector8HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector8HybridPluecker));
-
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1Intersector8));
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1MBIntersector8));
-
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4VirtualIntersector8Chunk));
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4VirtualMBIntersector8Chunk));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4InstanceIntersector8Chunk));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4InstanceMBIntersector8Chunk));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector8HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4GridMBIntersector8HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector8HybridPluecker));
-
- /* select intersectors16 */
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4OBBVirtualCurveIntersector16Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4OBBVirtualCurveIntersector16HybridMB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust16Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust16HybridMB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4Intersector16HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4Intersector16HybridMoellerNoFilter));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4iIntersector16HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4vIntersector16HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4iIntersector16HybridPluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4vMBIntersector16HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4iMBIntersector16HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4vMBIntersector16HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4iMBIntersector16HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersector16HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersector16HybridMoellerNoFilter));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4iIntersector16HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersector16HybridPluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4iIntersector16HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4iMBIntersector16HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4iMBIntersector16HybridPluecker));
-
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4SubdivPatch1Intersector16));
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4SubdivPatch1MBIntersector16));
-
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4VirtualIntersector16Chunk));
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4VirtualMBIntersector16Chunk));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4InstanceIntersector16Chunk));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4InstanceMBIntersector16Chunk));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4GridIntersector16HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4GridMBIntersector16HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4GridIntersector16HybridPluecker));
-
- /* select stream intersectors */
- SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4IntersectorStreamPacketFallback);
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4IntersectorStreamMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4IntersectorStreamMoellerNoFilter));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4iIntersectorStreamMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4vIntersectorStreamPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4iIntersectorStreamPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersectorStreamMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersectorStreamMoellerNoFilter));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4iIntersectorStreamMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersectorStreamPluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4iIntersectorStreamPluecker));
-
- IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4VirtualIntersectorStream));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4InstanceIntersectorStream));
-
-#endif
- }
-
- Accel::Intersectors BVH4Factory::BVH4OBBVirtualCurveIntersectors(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.leafIntersector = leafIntersector;
- intersectors.intersector1 = BVH4OBBVirtualCurveIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4OBBVirtualCurveIntersector4Hybrid();
- intersectors.intersector8 = BVH4OBBVirtualCurveIntersector8Hybrid();
- intersectors.intersector16 = BVH4OBBVirtualCurveIntersector16Hybrid();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.leafIntersector = leafIntersector;
- intersectors.intersector1 = BVH4OBBVirtualCurveIntersectorRobust1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4OBBVirtualCurveIntersectorRobust4Hybrid();
- intersectors.intersector8 = BVH4OBBVirtualCurveIntersectorRobust8Hybrid();
- intersectors.intersector16 = BVH4OBBVirtualCurveIntersectorRobust16Hybrid();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- default: assert(false);
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH4Factory::BVH4OBBVirtualCurveIntersectorsMB(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.leafIntersector = leafIntersector;
- intersectors.intersector1 = BVH4OBBVirtualCurveIntersector1MB();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4OBBVirtualCurveIntersector4HybridMB();
- intersectors.intersector8 = BVH4OBBVirtualCurveIntersector8HybridMB();
- intersectors.intersector16 = BVH4OBBVirtualCurveIntersector16HybridMB();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.leafIntersector = leafIntersector;
- intersectors.intersector1 = BVH4OBBVirtualCurveIntersectorRobust1MB();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4OBBVirtualCurveIntersectorRobust4HybridMB();
- intersectors.intersector8 = BVH4OBBVirtualCurveIntersectorRobust8HybridMB();
- intersectors.intersector16 = BVH4OBBVirtualCurveIntersectorRobust16HybridMB();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- default: assert(false);
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH4Factory::BVH4Triangle4Intersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- assert(ivariant == IntersectVariant::FAST);
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Triangle4Intersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4_filter = BVH4Triangle4Intersector4HybridMoeller();
- intersectors.intersector4_nofilter = BVH4Triangle4Intersector4HybridMoellerNoFilter();
- intersectors.intersector8_filter = BVH4Triangle4Intersector8HybridMoeller();
- intersectors.intersector8_nofilter = BVH4Triangle4Intersector8HybridMoellerNoFilter();
- intersectors.intersector16_filter = BVH4Triangle4Intersector16HybridMoeller();
- intersectors.intersector16_nofilter = BVH4Triangle4Intersector16HybridMoellerNoFilter();
- intersectors.intersectorN_filter = BVH4Triangle4IntersectorStreamMoeller();
- intersectors.intersectorN_nofilter = BVH4Triangle4IntersectorStreamMoellerNoFilter();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4Triangle4vIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- assert(ivariant == IntersectVariant::ROBUST);
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Triangle4vIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Triangle4vIntersector4HybridPluecker();
- intersectors.intersector8 = BVH4Triangle4vIntersector8HybridPluecker();
- intersectors.intersector16 = BVH4Triangle4vIntersector16HybridPluecker();
- intersectors.intersectorN = BVH4Triangle4vIntersectorStreamPluecker();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4Triangle4iIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Triangle4iIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Triangle4iIntersector4HybridMoeller();
- intersectors.intersector8 = BVH4Triangle4iIntersector8HybridMoeller();
- intersectors.intersector16 = BVH4Triangle4iIntersector16HybridMoeller();
- intersectors.intersectorN = BVH4Triangle4iIntersectorStreamMoeller();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Triangle4iIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Triangle4iIntersector4HybridPluecker();
- intersectors.intersector8 = BVH4Triangle4iIntersector8HybridPluecker();
- intersectors.intersector16 = BVH4Triangle4iIntersector16HybridPluecker();
- intersectors.intersectorN = BVH4Triangle4iIntersectorStreamPluecker();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH4Factory::BVH4Triangle4vMBIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Triangle4vMBIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Triangle4vMBIntersector4HybridMoeller();
- intersectors.intersector8 = BVH4Triangle4vMBIntersector8HybridMoeller();
- intersectors.intersector16 = BVH4Triangle4vMBIntersector16HybridMoeller();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Triangle4vMBIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Triangle4vMBIntersector4HybridPluecker();
- intersectors.intersector8 = BVH4Triangle4vMBIntersector8HybridPluecker();
- intersectors.intersector16 = BVH4Triangle4vMBIntersector16HybridPluecker();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH4Factory::BVH4Triangle4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Triangle4iMBIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Triangle4iMBIntersector4HybridMoeller();
- intersectors.intersector8 = BVH4Triangle4iMBIntersector8HybridMoeller();
- intersectors.intersector16 = BVH4Triangle4iMBIntersector16HybridMoeller();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Triangle4iMBIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Triangle4iMBIntersector4HybridPluecker();
- intersectors.intersector8 = BVH4Triangle4iMBIntersector8HybridPluecker();
- intersectors.intersector16 = BVH4Triangle4iMBIntersector16HybridPluecker();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH4Factory::BVH4Quad4vIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Quad4vIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4_filter = BVH4Quad4vIntersector4HybridMoeller();
- intersectors.intersector4_nofilter = BVH4Quad4vIntersector4HybridMoellerNoFilter();
- intersectors.intersector8_filter = BVH4Quad4vIntersector8HybridMoeller();
- intersectors.intersector8_nofilter = BVH4Quad4vIntersector8HybridMoellerNoFilter();
- intersectors.intersector16_filter = BVH4Quad4vIntersector16HybridMoeller();
- intersectors.intersector16_nofilter = BVH4Quad4vIntersector16HybridMoellerNoFilter();
- intersectors.intersectorN_filter = BVH4Quad4vIntersectorStreamMoeller();
- intersectors.intersectorN_nofilter = BVH4Quad4vIntersectorStreamMoellerNoFilter();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Quad4vIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Quad4vIntersector4HybridPluecker();
- intersectors.intersector8 = BVH4Quad4vIntersector8HybridPluecker();
- intersectors.intersector16 = BVH4Quad4vIntersector16HybridPluecker();
- intersectors.intersectorN = BVH4Quad4vIntersectorStreamPluecker();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH4Factory::BVH4Quad4iIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Quad4iIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Quad4iIntersector4HybridMoeller();
- intersectors.intersector8 = BVH4Quad4iIntersector8HybridMoeller();
- intersectors.intersector16= BVH4Quad4iIntersector16HybridMoeller();
- intersectors.intersectorN = BVH4Quad4iIntersectorStreamMoeller();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Quad4iIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Quad4iIntersector4HybridPluecker();
- intersectors.intersector8 = BVH4Quad4iIntersector8HybridPluecker();
- intersectors.intersector16= BVH4Quad4iIntersector16HybridPluecker();
- intersectors.intersectorN = BVH4Quad4iIntersectorStreamPluecker();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH4Factory::BVH4Quad4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Quad4iMBIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Quad4iMBIntersector4HybridMoeller();
- intersectors.intersector8 = BVH4Quad4iMBIntersector8HybridMoeller();
- intersectors.intersector16= BVH4Quad4iMBIntersector16HybridMoeller();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Quad4iMBIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Quad4iMBIntersector4HybridPluecker();
- intersectors.intersector8 = BVH4Quad4iMBIntersector8HybridPluecker();
- intersectors.intersector16= BVH4Quad4iMBIntersector16HybridPluecker();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH4Factory::QBVH4Triangle4iIntersectors(BVH4* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = QBVH4Triangle4iIntersector1Pluecker();
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::QBVH4Quad4iIntersectors(BVH4* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = QBVH4Quad4iIntersector1Pluecker();
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4UserGeometryIntersectors(BVH4* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4VirtualIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4VirtualIntersector4Chunk();
- intersectors.intersector8 = BVH4VirtualIntersector8Chunk();
- intersectors.intersector16 = BVH4VirtualIntersector16Chunk();
- intersectors.intersectorN = BVH4VirtualIntersectorStream();
-#endif
- intersectors.collider = BVH4ColliderUserGeom();
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4UserGeometryMBIntersectors(BVH4* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4VirtualMBIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4VirtualMBIntersector4Chunk();
- intersectors.intersector8 = BVH4VirtualMBIntersector8Chunk();
- intersectors.intersector16 = BVH4VirtualMBIntersector16Chunk();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4InstanceIntersectors(BVH4* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4InstanceIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4InstanceIntersector4Chunk();
- intersectors.intersector8 = BVH4InstanceIntersector8Chunk();
- intersectors.intersector16 = BVH4InstanceIntersector16Chunk();
- intersectors.intersectorN = BVH4InstanceIntersectorStream();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4InstanceMBIntersectors(BVH4* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4InstanceMBIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4InstanceMBIntersector4Chunk();
- intersectors.intersector8 = BVH4InstanceMBIntersector8Chunk();
- intersectors.intersector16 = BVH4InstanceMBIntersector16Chunk();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4SubdivPatch1Intersectors(BVH4* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4SubdivPatch1Intersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4SubdivPatch1Intersector4();
- intersectors.intersector8 = BVH4SubdivPatch1Intersector8();
- intersectors.intersector16 = BVH4SubdivPatch1Intersector16();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4SubdivPatch1MBIntersectors(BVH4* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4SubdivPatch1MBIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4SubdivPatch1MBIntersector4();
- intersectors.intersector8 = BVH4SubdivPatch1MBIntersector8();
- intersectors.intersector16 = BVH4SubdivPatch1MBIntersector16();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
-
- Accel* BVH4Factory::BVH4OBBVirtualCurve4i(Scene* scene, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Curve4i::type,scene);
- Accel::Intersectors intersectors = BVH4OBBVirtualCurveIntersectors(accel,VirtualCurveIntersector4i(),ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->hair_builder == "default" ) builder = BVH4Curve4iBuilder_OBB_New(accel,scene,0);
- else if (scene->device->hair_builder == "sah" ) builder = BVH4Curve4iBuilder_OBB_New(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->hair_builder+" for BVH4OBB<VirtualCurve4i>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
-#if defined(EMBREE_TARGET_SIMD8)
- Accel* BVH4Factory::BVH4OBBVirtualCurve8i(Scene* scene, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Curve8i::type,scene);
- Accel::Intersectors intersectors = BVH4OBBVirtualCurveIntersectors(accel,VirtualCurveIntersector8i(),ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->hair_builder == "default" ) builder = BVH4Curve8iBuilder_OBB_New(accel,scene,0);
- else if (scene->device->hair_builder == "sah" ) builder = BVH4Curve8iBuilder_OBB_New(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->hair_builder+" for BVH4OBB<VirtualCurve8i>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-#endif
-
- Accel* BVH4Factory::BVH4OBBVirtualCurve4v(Scene* scene, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Curve4v::type,scene);
- Accel::Intersectors intersectors = BVH4OBBVirtualCurveIntersectors(accel,VirtualCurveIntersector4v(),ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->hair_builder == "default" ) builder = BVH4Curve4vBuilder_OBB_New(accel,scene,0);
- else if (scene->device->hair_builder == "sah" ) builder = BVH4Curve4vBuilder_OBB_New(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->hair_builder+" for BVH4OBB<VirtualCurve4v>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4OBBVirtualCurve4iMB(Scene* scene, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Curve4iMB::type,scene);
- Accel::Intersectors intersectors = BVH4OBBVirtualCurveIntersectorsMB(accel,VirtualCurveIntersector4iMB(),ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->hair_builder == "default" ) builder = BVH4OBBCurve4iMBBuilder_OBB(accel,scene,0);
- else if (scene->device->hair_builder == "sah" ) builder = BVH4OBBCurve4iMBBuilder_OBB(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->hair_builder+" for BVH4OBB<VirtualCurve4iMB>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
-#if defined(EMBREE_TARGET_SIMD8)
- Accel* BVH4Factory::BVH4OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Curve8iMB::type,scene);
- Accel::Intersectors intersectors = BVH4OBBVirtualCurveIntersectorsMB(accel,VirtualCurveIntersector8iMB(), ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->hair_builder == "default" ) builder = BVH4OBBCurve8iMBBuilder_OBB(accel,scene,0);
- else if (scene->device->hair_builder == "sah" ) builder = BVH4OBBCurve8iMBBuilder_OBB(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->hair_builder+" for BVH4OBB<VirtualCurve8iMB>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-#endif
-
- Accel* BVH4Factory::BVH4Triangle4(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Triangle4::type,scene);
-
- Accel::Intersectors intersectors;
- if (scene->device->tri_traverser == "default") intersectors = BVH4Triangle4Intersectors(accel,ivariant);
- else if (scene->device->tri_traverser == "fast" ) intersectors = BVH4Triangle4Intersectors(accel,IntersectVariant::FAST);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown traverser "+scene->device->tri_traverser+" for BVH4<Triangle4>");
-
- Builder* builder = nullptr;
- if (scene->device->tri_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4Triangle4SceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelTriangle4MeshSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: builder = BVH4Triangle4SceneBuilderFastSpatialSAH(accel,scene,0); break;
- }
- }
- else if (scene->device->tri_builder == "sah" ) builder = BVH4Triangle4SceneBuilderSAH(accel,scene,0);
- else if (scene->device->tri_builder == "sah_fast_spatial" ) builder = BVH4Triangle4SceneBuilderFastSpatialSAH(accel,scene,0);
- else if (scene->device->tri_builder == "sah_presplit") builder = BVH4Triangle4SceneBuilderSAH(accel,scene,MODE_HIGH_QUALITY);
- else if (scene->device->tri_builder == "dynamic" ) builder = BVH4BuilderTwoLevelTriangle4MeshSAH(accel,scene,false);
- else if (scene->device->tri_builder == "morton" ) builder = BVH4BuilderTwoLevelTriangle4MeshSAH(accel,scene,true);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH4<Triangle4>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4Triangle4v(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Triangle4v::type,scene);
-
- Accel::Intersectors intersectors;
- if (scene->device->tri_traverser == "default") intersectors = BVH4Triangle4vIntersectors(accel,ivariant);
- else if (scene->device->tri_traverser == "fast" ) intersectors = BVH4Triangle4vIntersectors(accel,IntersectVariant::FAST);
- else if (scene->device->tri_traverser == "robust" ) intersectors = BVH4Triangle4vIntersectors(accel,IntersectVariant::ROBUST);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown traverser "+scene->device->tri_traverser+" for BVH4<Triangle4>");
-
- Builder* builder = nullptr;
- if (scene->device->tri_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4Triangle4vSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelTriangle4vMeshSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: builder = BVH4Triangle4vSceneBuilderFastSpatialSAH(accel,scene,0); break;
- }
- }
- else if (scene->device->tri_builder == "sah" ) builder = BVH4Triangle4vSceneBuilderSAH(accel,scene,0);
- else if (scene->device->tri_builder == "sah_fast_spatial" ) builder = BVH4Triangle4vSceneBuilderFastSpatialSAH(accel,scene,0);
- else if (scene->device->tri_builder == "sah_presplit") builder = BVH4Triangle4vSceneBuilderSAH(accel,scene,MODE_HIGH_QUALITY);
- else if (scene->device->tri_builder == "dynamic" ) builder = BVH4BuilderTwoLevelTriangle4vMeshSAH(accel,scene,false);
- else if (scene->device->tri_builder == "morton" ) builder = BVH4BuilderTwoLevelTriangle4vMeshSAH(accel,scene,true);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH4<Triangle4v>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4Triangle4i(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Triangle4i::type,scene);
-
- Accel::Intersectors intersectors;
- if (scene->device->tri_traverser == "default") intersectors = BVH4Triangle4iIntersectors(accel,ivariant);
- else if (scene->device->tri_traverser == "fast" ) intersectors = BVH4Triangle4iIntersectors(accel,IntersectVariant::FAST);
- else if (scene->device->tri_traverser == "robust" ) intersectors = BVH4Triangle4iIntersectors(accel,IntersectVariant::ROBUST);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown traverser "+scene->device->tri_traverser+" for BVH4<Triangle4i>");
-
- Builder* builder = nullptr;
- if (scene->device->tri_builder == "default" ) {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4Triangle4iSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelTriangle4iMeshSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: builder = BVH4Triangle4iSceneBuilderFastSpatialSAH(accel,scene,0); break;
- }
- }
- else if (scene->device->tri_builder == "sah" ) builder = BVH4Triangle4iSceneBuilderSAH(accel,scene,0);
- else if (scene->device->tri_builder == "sah_fast_spatial" ) builder = BVH4Triangle4iSceneBuilderFastSpatialSAH(accel,scene,0);
- else if (scene->device->tri_builder == "sah_presplit") builder = BVH4Triangle4iSceneBuilderSAH(accel,scene,MODE_HIGH_QUALITY);
- else if (scene->device->tri_builder == "dynamic" ) builder = BVH4BuilderTwoLevelTriangle4iMeshSAH(accel,scene,false);
- else if (scene->device->tri_builder == "morton" ) builder = BVH4BuilderTwoLevelTriangle4iMeshSAH(accel,scene,true);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH4<Triangle4i>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4Triangle4iMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Triangle4i::type,scene);
-
- Accel::Intersectors intersectors;
- if (scene->device->tri_traverser_mb == "default") intersectors = BVH4Triangle4iMBIntersectors(accel,ivariant);
- else if (scene->device->tri_traverser_mb == "fast" ) intersectors = BVH4Triangle4iMBIntersectors(accel,IntersectVariant::FAST);
- else if (scene->device->tri_traverser_mb == "robust" ) intersectors = BVH4Triangle4iMBIntersectors(accel,IntersectVariant::ROBUST);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown traverser "+scene->device->tri_traverser_mb+" for BVH4<Triangle4iMB>");
-
- Builder* builder = nullptr;
- if (scene->device->tri_builder_mb == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4Triangle4iMBSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->tri_builder_mb == "internal_time_splits") builder = BVH4Triangle4iMBSceneBuilderSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder_mb+" for BVH4<Triangle4iMB>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4Triangle4vMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Triangle4vMB::type,scene);
-
- Accel::Intersectors intersectors;
- if (scene->device->tri_traverser_mb == "default") intersectors = BVH4Triangle4vMBIntersectors(accel,ivariant);
- else if (scene->device->tri_traverser_mb == "fast" ) intersectors = BVH4Triangle4vMBIntersectors(accel,IntersectVariant::FAST);
- else if (scene->device->tri_traverser_mb == "robust" ) intersectors = BVH4Triangle4vMBIntersectors(accel,IntersectVariant::ROBUST);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown traverser "+scene->device->tri_traverser_mb+" for BVH4<Triangle4vMB>");
-
- Builder* builder = nullptr;
- if (scene->device->tri_builder_mb == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4Triangle4vMBSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->tri_builder_mb == "internal_time_splits") builder = BVH4Triangle4vMBSceneBuilderSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder_mb+" for BVH4<Triangle4vMB>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4Quad4v(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Quad4v::type,scene);
- Accel::Intersectors intersectors = BVH4Quad4vIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->quad_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4Quad4vSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelQuadMeshSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: builder = BVH4Quad4vSceneBuilderFastSpatialSAH(accel,scene,0); break;
- }
- }
- else if (scene->device->quad_builder == "sah" ) builder = BVH4Quad4vSceneBuilderSAH(accel,scene,0);
- else if (scene->device->quad_builder == "sah_fast_spatial" ) builder = BVH4Quad4vSceneBuilderFastSpatialSAH(accel,scene,0);
- else if (scene->device->quad_builder == "dynamic" ) builder = BVH4BuilderTwoLevelQuadMeshSAH(accel,scene,false);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder+" for BVH4<Quad4v>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4Quad4i(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Quad4i::type,scene);
- Accel::Intersectors intersectors = BVH4Quad4iIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->quad_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4Quad4iSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement
- case BuildVariant::HIGH_QUALITY: assert(false); break; // FIXME: implement
- }
- }
- else if (scene->device->quad_builder == "sah") builder = BVH4Quad4iSceneBuilderSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder+" for BVH4<Quad4i>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4Quad4iMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Quad4i::type,scene);
- Accel::Intersectors intersectors = BVH4Quad4iMBIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->quad_builder_mb == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4Quad4iMBSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->quad_builder_mb == "sah") builder = BVH4Quad4iMBSceneBuilderSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder_mb+" for BVH4<Quad4iMB>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4QuantizedQuad4i(Scene* scene)
- {
- BVH4* accel = new BVH4(Quad4i::type,scene);
- Builder* builder = BVH4QuantizedQuad4iSceneBuilderSAH(accel,scene,0);
- Accel::Intersectors intersectors = QBVH4Quad4iIntersectors(accel);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4QuantizedTriangle4i(Scene* scene)
- {
- BVH4* accel = new BVH4(Triangle4i::type,scene);
- Builder* builder = BVH4QuantizedTriangle4iSceneBuilderSAH(accel,scene,0);
- Accel::Intersectors intersectors = QBVH4Triangle4iIntersectors(accel);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4SubdivPatch1(Scene* scene)
- {
- BVH4* accel = new BVH4(SubdivPatch1::type,scene);
- Accel::Intersectors intersectors = BVH4SubdivPatch1Intersectors(accel);
- Builder* builder = BVH4SubdivPatch1BuilderSAH(accel,scene,0);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4SubdivPatch1MB(Scene* scene)
- {
- BVH4* accel = new BVH4(SubdivPatch1::type,scene);
- Accel::Intersectors intersectors = BVH4SubdivPatch1MBIntersectors(accel);
- Builder* builder = BVH4SubdivPatch1MBBuilderSAH(accel,scene,0);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4UserGeometry(Scene* scene, BuildVariant bvariant)
- {
- BVH4* accel = new BVH4(Object::type,scene);
- Accel::Intersectors intersectors = BVH4UserGeometryIntersectors(accel);
-
- Builder* builder = nullptr;
- if (scene->device->object_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4VirtualSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelVirtualSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->object_builder == "sah") builder = BVH4VirtualSceneBuilderSAH(accel,scene,0);
- else if (scene->device->object_builder == "dynamic") builder = BVH4BuilderTwoLevelVirtualSAH(accel,scene,false);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH4<Object>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4UserGeometryMB(Scene* scene)
- {
- BVH4* accel = new BVH4(Object::type,scene);
- Accel::Intersectors intersectors = BVH4UserGeometryMBIntersectors(accel);
- Builder* builder = BVH4VirtualMBSceneBuilderSAH(accel,scene,0);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4Instance(Scene* scene, bool isExpensive, BuildVariant bvariant)
- {
- BVH4* accel = new BVH4(InstancePrimitive::type,scene);
- Accel::Intersectors intersectors = BVH4InstanceIntersectors(accel);
- auto gtype = isExpensive ? Geometry::MTY_INSTANCE_EXPENSIVE : Geometry::MTY_INSTANCE_CHEAP;
- // Builder* builder = BVH4InstanceSceneBuilderSAH(accel,scene,gtype);
-
- Builder* builder = nullptr;
- if (scene->device->object_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4InstanceSceneBuilderSAH(accel,scene,gtype); break;
- case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelInstanceSAH(accel,scene,gtype,false); break;
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->object_builder == "sah") builder = BVH4InstanceSceneBuilderSAH(accel,scene,gtype);
- else if (scene->device->object_builder == "dynamic") builder = BVH4BuilderTwoLevelInstanceSAH(accel,scene,gtype,false);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH4<Object>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4InstanceMB(Scene* scene, bool isExpensive)
- {
- BVH4* accel = new BVH4(InstancePrimitive::type,scene);
- Accel::Intersectors intersectors = BVH4InstanceMBIntersectors(accel);
- auto gtype = isExpensive ? Geometry::MTY_INSTANCE_EXPENSIVE : Geometry::MTY_INSTANCE_CHEAP;
- Builder* builder = BVH4InstanceMBSceneBuilderSAH(accel,scene,gtype);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel::Intersectors BVH4Factory::BVH4GridIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- if (ivariant == IntersectVariant::FAST)
- {
- intersectors.intersector1 = BVH4GridIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4GridIntersector4HybridMoeller();
- intersectors.intersector8 = BVH4GridIntersector8HybridMoeller();
- intersectors.intersector16 = BVH4GridIntersector16HybridMoeller();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- }
- else /* if (ivariant == IntersectVariant::ROBUST) */
- {
- intersectors.intersector1 = BVH4GridIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4GridIntersector4HybridPluecker();
- intersectors.intersector8 = BVH4GridIntersector8HybridPluecker();
- intersectors.intersector16 = BVH4GridIntersector16HybridPluecker();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- }
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4GridMBIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4GridMBIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4GridMBIntersector4HybridMoeller();
- intersectors.intersector8 = BVH4GridMBIntersector8HybridMoeller();
- intersectors.intersector16 = BVH4GridMBIntersector16HybridMoeller();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
-
- Accel* BVH4Factory::BVH4Grid(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(SubGridQBVH4::type,scene);
- Accel::Intersectors intersectors = BVH4GridIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->object_builder == "default") {
- builder = BVH4GridSceneBuilderSAH(accel,scene,0);
- }
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->grid_builder+" for BVH4<GridMesh>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4GridMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(SubGridQBVH4::type,scene);
- Accel::Intersectors intersectors = BVH4GridMBIntersectors(accel,ivariant);
- Builder* builder = nullptr;
- if (scene->device->object_builder == "default") {
- builder = BVH4GridMBSceneBuilderSAH(accel,scene,0);
- }
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->grid_builder+" for BVH4MB<GridMesh>");
- return new AccelInstance(accel,builder,intersectors);
- }
-
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.h b/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.h
deleted file mode 100644
index a68227b41f..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.h
+++ /dev/null
@@ -1,316 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_factory.h"
-
-namespace embree
-{
- /*! BVH4 instantiations */
- class BVH4Factory : public BVHFactory
- {
- public:
- BVH4Factory(int bfeatures, int ifeatures);
-
- public:
- Accel* BVH4OBBVirtualCurve4i(Scene* scene, IntersectVariant ivariant);
- Accel* BVH4OBBVirtualCurve4v(Scene* scene, IntersectVariant ivariant);
- Accel* BVH4OBBVirtualCurve8i(Scene* scene, IntersectVariant ivariant);
- Accel* BVH4OBBVirtualCurve4iMB(Scene* scene, IntersectVariant ivariant);
- Accel* BVH4OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant);
- DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4i);
- DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8i);
- DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4v);
- DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8v);
- DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4iMB);
- DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8iMB);
-
- Accel* BVH4Triangle4 (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH4Triangle4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::ROBUST);
- Accel* BVH4Triangle4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH4Triangle4vMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH4Triangle4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
-
- Accel* BVH4Quad4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH4Quad4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH4Quad4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
-
- Accel* BVH4QuantizedTriangle4i(Scene* scene);
- Accel* BVH4QuantizedQuad4i(Scene* scene);
-
- Accel* BVH4SubdivPatch1(Scene* scene);
- Accel* BVH4SubdivPatch1MB(Scene* scene);
-
- Accel* BVH4UserGeometry(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
- Accel* BVH4UserGeometryMB(Scene* scene);
-
- Accel* BVH4Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC);
- Accel* BVH4InstanceMB(Scene* scene, bool isExpensive);
-
- Accel* BVH4Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH4GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
-
- private:
- void selectBuilders(int features);
- void selectIntersectors(int features);
-
- private:
- Accel::Intersectors BVH4OBBVirtualCurveIntersectors(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
- Accel::Intersectors BVH4OBBVirtualCurveIntersectorsMB(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
-
- Accel::Intersectors BVH4Triangle4Intersectors(BVH4* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH4Triangle4vIntersectors(BVH4* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH4Triangle4iIntersectors(BVH4* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH4Triangle4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH4Triangle4vMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
-
- Accel::Intersectors BVH4Quad4vIntersectors(BVH4* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH4Quad4iIntersectors(BVH4* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH4Quad4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
-
- Accel::Intersectors QBVH4Quad4iIntersectors(BVH4* bvh);
- Accel::Intersectors QBVH4Triangle4iIntersectors(BVH4* bvh);
-
- Accel::Intersectors BVH4UserGeometryIntersectors(BVH4* bvh);
- Accel::Intersectors BVH4UserGeometryMBIntersectors(BVH4* bvh);
-
- Accel::Intersectors BVH4InstanceIntersectors(BVH4* bvh);
- Accel::Intersectors BVH4InstanceMBIntersectors(BVH4* bvh);
-
- Accel::Intersectors BVH4SubdivPatch1Intersectors(BVH4* bvh);
- Accel::Intersectors BVH4SubdivPatch1MBIntersectors(BVH4* bvh);
-
- Accel::Intersectors BVH4GridIntersectors(BVH4* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH4GridMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
-
- private:
-
- DEFINE_SYMBOL2(Accel::Collider,BVH4ColliderUserGeom);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1MB);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1MB);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4Intersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vIntersector1Pluecker);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Pluecker);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Pluecker);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,QBVH4Triangle4iIntersector1Pluecker);
- DEFINE_SYMBOL2(Accel::Intersector1,QBVH4Quad4iIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1Intersector1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1MBIntersector1);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4VirtualIntersector1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4VirtualMBIntersector1);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceIntersector1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceMBIntersector1);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridMBIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4HybridMB);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4HybridMB);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vIntersector4HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1Intersector4);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1MBIntersector4);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4VirtualIntersector4Chunk);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4VirtualMBIntersector4Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceIntersector4Chunk);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceMBIntersector4Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridMBIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridPluecker);
-
- // ==============
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8HybridMB);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8HybridMB);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vIntersector8HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1Intersector8);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1MBIntersector8);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4VirtualIntersector8Chunk);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4VirtualMBIntersector8Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceIntersector8Chunk);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceMBIntersector8Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridMBIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridPluecker);
-
- // ==============
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16HybridMB);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16HybridMB);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vIntersector16HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1Intersector16);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1MBIntersector16);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4VirtualIntersector16Chunk);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4VirtualMBIntersector16Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceIntersector16Chunk);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceMBIntersector16Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridMBIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridPluecker);
-
- // ==============
-
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4IntersectorStreamPacketFallback);
-
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4IntersectorStreamMoeller);
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4IntersectorStreamMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4iIntersectorStreamMoeller);
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4vIntersectorStreamPluecker);
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4iIntersectorStreamPluecker);
-
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4vIntersectorStreamMoeller);
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4vIntersectorStreamMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4iIntersectorStreamMoeller);
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4vIntersectorStreamPluecker);
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4iIntersectorStreamPluecker);
-
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH4VirtualIntersectorStream);
-
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH4InstanceIntersectorStream);
-
- // SAH scene builders
- private:
- DEFINE_ISA_FUNCTION(Builder*,BVH4Curve4vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Curve4iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4OBBCurve4iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Curve8iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1BuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1MBBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH4VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
- DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH4GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- // spatial scene builder
- private:
- DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
-
- // twolevel scene builders
- private:
- DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.cpp
deleted file mode 100644
index 9fe057c392..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.cpp
+++ /dev/null
@@ -1,1165 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "../common/isa.h" // to define EMBREE_TARGET_SIMD8
-
-#if defined (EMBREE_TARGET_SIMD8)
-
-#include "bvh8_factory.h"
-#include "../bvh/bvh.h"
-
-#include "../geometry/curveNv.h"
-#include "../geometry/curveNi.h"
-#include "../geometry/curveNi_mb.h"
-#include "../geometry/linei.h"
-#include "../geometry/triangle.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/trianglev_mb.h"
-#include "../geometry/trianglei.h"
-#include "../geometry/quadv.h"
-#include "../geometry/quadi.h"
-#include "../geometry/subdivpatch1.h"
-#include "../geometry/object.h"
-#include "../geometry/instance.h"
-#include "../geometry/subgrid.h"
-#include "../common/accelinstance.h"
-
-namespace embree
-{
- DECLARE_SYMBOL2(Accel::Collider,BVH8ColliderUserGeom);
-
- DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector8v,void);
- DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector8iMB,void);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1MB);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1MB);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4Intersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Pluecker);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Woop);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Pluecker);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Pluecker);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4iIntersector1Pluecker);
- DECLARE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4Intersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,QBVH8Quad4iIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8VirtualIntersector1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8VirtualMBIntersector1);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8InstanceIntersector1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8InstanceMBIntersector1);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8GridMBIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4HybridMB);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4HybridMB);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vIntersector4HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8VirtualIntersector4Chunk);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8VirtualMBIntersector4Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8InstanceIntersector4Chunk);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8InstanceMBIntersector4Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8HybridMB);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8HybridMB);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vIntersector8HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8VirtualIntersector8Chunk);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8VirtualMBIntersector8Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8InstanceIntersector8Chunk);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8InstanceMBIntersector8Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16HybridMB);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16HybridMB);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vIntersector16HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8VirtualIntersector16Chunk);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8VirtualMBIntersector16Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8InstanceIntersector16Chunk);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8InstanceMBIntersector16Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8IntersectorStreamPacketFallback);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoeller);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamMoeller);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4vIntersectorStreamPluecker);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamPluecker);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoeller);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamMoeller);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamPluecker);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamPluecker);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8VirtualIntersectorStream);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8InstanceIntersectorStream);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH8Curve8vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
- DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
-
- BVH8Factory::BVH8Factory(int bfeatures, int ifeatures)
- {
- SELECT_SYMBOL_INIT_AVX(ifeatures,BVH8ColliderUserGeom);
-
- selectBuilders(bfeatures);
- selectIntersectors(ifeatures);
- }
-
- void BVH8Factory::selectBuilders(int features)
- {
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX(features,BVH8Curve8vBuilder_OBB_New));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX(features,BVH8OBBCurve8iMBBuilder_OBB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4SceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4vSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4iSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4iMBSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4vMBSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX(features,BVH8QuantizedTriangle4iSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX(features,BVH8QuantizedTriangle4SceneBuilderSAH));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Quad4vSceneBuilderSAH));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Quad4iSceneBuilderSAH));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Quad4iMBSceneBuilderSAH));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX(features,BVH8QuantizedQuad4iSceneBuilderSAH));
-
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX(features,BVH8VirtualSceneBuilderSAH));
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX(features,BVH8VirtualMBSceneBuilderSAH));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX(features,BVH8InstanceSceneBuilderSAH));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX(features,BVH8InstanceMBSceneBuilderSAH));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX(features,BVH8GridSceneBuilderSAH));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX(features,BVH8GridMBSceneBuilderSAH));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4SceneBuilderFastSpatialSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4vSceneBuilderFastSpatialSAH));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Quad4vSceneBuilderFastSpatialSAH));
-
- IF_ENABLED_TRIS (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelTriangle4MeshSAH));
- IF_ENABLED_TRIS (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelTriangle4vMeshSAH));
- IF_ENABLED_TRIS (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelTriangle4iMeshSAH));
- IF_ENABLED_QUADS (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelQuadMeshSAH));
- IF_ENABLED_USER (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelVirtualSAH));
- IF_ENABLED_INSTANCE (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelInstanceSAH));
- }
-
- void BVH8Factory::selectIntersectors(int features)
- {
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8v));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8iMB));
-
- /* select intersectors1 */
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersector1));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersector1MB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust1));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust1MB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4Intersector1Moeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersector1Moeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vIntersector1Pluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersector1Pluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vIntersector1Woop));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vMBIntersector1Moeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iMBIntersector1Moeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vMBIntersector1Pluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iMBIntersector1Pluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector1Moeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersector1Moeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector1Pluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersector1Pluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iMBIntersector1Moeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iMBIntersector1Pluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,QBVH8Triangle4iIntersector1Pluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,QBVH8Triangle4Intersector1Moeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,QBVH8Quad4iIntersector1Pluecker));
-
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8VirtualIntersector1));
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8VirtualMBIntersector1));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8InstanceIntersector1));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8InstanceMBIntersector1));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector1Moeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridMBIntersector1Moeller))
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector1Pluecker));
-
-#if defined (EMBREE_RAY_PACKETS)
-
- /* select intersectors4 */
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersector4Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersector4HybridMB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust4Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust4HybridMB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4Intersector4HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4Intersector4HybridMoellerNoFilter));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iIntersector4HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vIntersector4HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iIntersector4HybridPluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vMBIntersector4HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iMBIntersector4HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vMBIntersector4HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iMBIntersector4HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector4HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector4HybridMoellerNoFilter));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4iIntersector4HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector4HybridPluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4iIntersector4HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2(features,BVH8Quad4iMBIntersector4HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2(features,BVH8Quad4iMBIntersector4HybridPluecker));
-
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8VirtualIntersector4Chunk));
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8VirtualMBIntersector4Chunk));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8InstanceIntersector4Chunk));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8InstanceMBIntersector4Chunk));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector4HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector4HybridPluecker));
-
- /* select intersectors8 */
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersector8Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersector8HybridMB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust8Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust8HybridMB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4Intersector8HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4Intersector8HybridMoellerNoFilter));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iIntersector8HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vIntersector8HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iIntersector8HybridPluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vMBIntersector8HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iMBIntersector8HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vMBIntersector8HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iMBIntersector8HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector8HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector8HybridMoellerNoFilter));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4iIntersector8HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector8HybridPluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4iIntersector8HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2(features,BVH8Quad4iMBIntersector8HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2(features,BVH8Quad4iMBIntersector8HybridPluecker));
-
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8VirtualIntersector8Chunk));
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8VirtualMBIntersector8Chunk));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8InstanceIntersector8Chunk));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8InstanceMBIntersector8Chunk));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector8HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector8HybridPluecker));
-
- /* select intersectors16 */
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersector16Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersector16HybridMB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust16Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust16HybridMB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4Intersector16HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4Intersector16HybridMoellerNoFilter));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersector16HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4vIntersector16HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersector16HybridPluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4vMBIntersector16HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4iMBIntersector16HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4vMBIntersector16HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4iMBIntersector16HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector16HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector16HybridMoellerNoFilter));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersector16HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector16HybridPluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersector16HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4iMBIntersector16HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4iMBIntersector16HybridPluecker));
-
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8VirtualIntersector16Chunk));
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8VirtualMBIntersector16Chunk));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8InstanceIntersector16Chunk));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8InstanceMBIntersector16Chunk));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8GridIntersector16HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8GridIntersector16HybridPluecker));
-
- /* select stream intersectors */
-
- SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8IntersectorStreamPacketFallback);
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4IntersectorStreamMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4IntersectorStreamMoellerNoFilter));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersectorStreamMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vIntersectorStreamPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersectorStreamPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersectorStreamMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersectorStreamMoellerNoFilter));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersectorStreamMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersectorStreamPluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersectorStreamPluecker));
-
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8VirtualIntersectorStream));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8InstanceIntersectorStream));
-
-#endif
- }
-
- Accel::Intersectors BVH8Factory::BVH8OBBVirtualCurveIntersectors(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.leafIntersector = leafIntersector;
- intersectors.intersector1 = BVH8OBBVirtualCurveIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8OBBVirtualCurveIntersector4Hybrid();
- intersectors.intersector8 = BVH8OBBVirtualCurveIntersector8Hybrid();
- intersectors.intersector16 = BVH8OBBVirtualCurveIntersector16Hybrid();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.leafIntersector = leafIntersector;
- intersectors.intersector1 = BVH8OBBVirtualCurveIntersectorRobust1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8OBBVirtualCurveIntersectorRobust4Hybrid();
- intersectors.intersector8 = BVH8OBBVirtualCurveIntersectorRobust8Hybrid();
- intersectors.intersector16 = BVH8OBBVirtualCurveIntersectorRobust16Hybrid();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- default: assert(false);
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH8Factory::BVH8OBBVirtualCurveIntersectorsMB(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.leafIntersector = leafIntersector;
- intersectors.intersector1 = BVH8OBBVirtualCurveIntersector1MB();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8OBBVirtualCurveIntersector4HybridMB();
- intersectors.intersector8 = BVH8OBBVirtualCurveIntersector8HybridMB();
- intersectors.intersector16 = BVH8OBBVirtualCurveIntersector16HybridMB();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.leafIntersector = leafIntersector;
- intersectors.intersector1 = BVH8OBBVirtualCurveIntersectorRobust1MB();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8OBBVirtualCurveIntersectorRobust4HybridMB();
- intersectors.intersector8 = BVH8OBBVirtualCurveIntersectorRobust8HybridMB();
- intersectors.intersector16 = BVH8OBBVirtualCurveIntersectorRobust16HybridMB();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- default: assert(false);
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH8Factory::BVH8Triangle4Intersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- assert(ivariant == IntersectVariant::FAST);
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Triangle4Intersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4_filter = BVH8Triangle4Intersector4HybridMoeller();
- intersectors.intersector4_nofilter = BVH8Triangle4Intersector4HybridMoellerNoFilter();
- intersectors.intersector8_filter = BVH8Triangle4Intersector8HybridMoeller();
- intersectors.intersector8_nofilter = BVH8Triangle4Intersector8HybridMoellerNoFilter();
- intersectors.intersector16_filter = BVH8Triangle4Intersector16HybridMoeller();
- intersectors.intersector16_nofilter = BVH8Triangle4Intersector16HybridMoellerNoFilter();
- intersectors.intersectorN_filter = BVH8Triangle4IntersectorStreamMoeller();
- intersectors.intersectorN_nofilter = BVH8Triangle4IntersectorStreamMoellerNoFilter();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::BVH8Triangle4vIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
-#define ENABLE_WOOP_TEST 0
-#if ENABLE_WOOP_TEST == 0
- //assert(ivariant == IntersectVariant::ROBUST);
- intersectors.intersector1 = BVH8Triangle4vIntersector1Pluecker();
-#else
- intersectors.intersector1 = BVH8Triangle4vIntersector1Woop();
-#endif
-
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Triangle4vIntersector4HybridPluecker();
- intersectors.intersector8 = BVH8Triangle4vIntersector8HybridPluecker();
- intersectors.intersector16 = BVH8Triangle4vIntersector16HybridPluecker();
- intersectors.intersectorN = BVH8Triangle4vIntersectorStreamPluecker();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::BVH8Triangle4iIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Triangle4iIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Triangle4iIntersector4HybridMoeller();
- intersectors.intersector8 = BVH8Triangle4iIntersector8HybridMoeller();
- intersectors.intersector16 = BVH8Triangle4iIntersector16HybridMoeller();
- intersectors.intersectorN = BVH8Triangle4iIntersectorStreamMoeller();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Triangle4iIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Triangle4iIntersector4HybridPluecker();
- intersectors.intersector8 = BVH8Triangle4iIntersector8HybridPluecker();
- intersectors.intersector16 = BVH8Triangle4iIntersector16HybridPluecker();
- intersectors.intersectorN = BVH8Triangle4iIntersectorStreamPluecker();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH8Factory::BVH8Triangle4vMBIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Triangle4vMBIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Triangle4vMBIntersector4HybridMoeller();
- intersectors.intersector8 = BVH8Triangle4vMBIntersector8HybridMoeller();
- intersectors.intersector16 = BVH8Triangle4vMBIntersector16HybridMoeller();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Triangle4vMBIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Triangle4vMBIntersector4HybridPluecker();
- intersectors.intersector8 = BVH8Triangle4vMBIntersector8HybridPluecker();
- intersectors.intersector16 = BVH8Triangle4vMBIntersector16HybridPluecker();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH8Factory::BVH8Triangle4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Triangle4iMBIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Triangle4iMBIntersector4HybridMoeller();
- intersectors.intersector8 = BVH8Triangle4iMBIntersector8HybridMoeller();
- intersectors.intersector16 = BVH8Triangle4iMBIntersector16HybridMoeller();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Triangle4iMBIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Triangle4iMBIntersector4HybridPluecker();
- intersectors.intersector8 = BVH8Triangle4iMBIntersector8HybridPluecker();
- intersectors.intersector16 = BVH8Triangle4iMBIntersector16HybridPluecker();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH8Factory::BVH8Quad4vIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Quad4vIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4_filter = BVH8Quad4vIntersector4HybridMoeller();
- intersectors.intersector4_nofilter = BVH8Quad4vIntersector4HybridMoellerNoFilter();
- intersectors.intersector8_filter = BVH8Quad4vIntersector8HybridMoeller();
- intersectors.intersector8_nofilter = BVH8Quad4vIntersector8HybridMoellerNoFilter();
- intersectors.intersector16_filter = BVH8Quad4vIntersector16HybridMoeller();
- intersectors.intersector16_nofilter = BVH8Quad4vIntersector16HybridMoellerNoFilter();
- intersectors.intersectorN_filter = BVH8Quad4vIntersectorStreamMoeller();
- intersectors.intersectorN_nofilter = BVH8Quad4vIntersectorStreamMoellerNoFilter();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Quad4vIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Quad4vIntersector4HybridPluecker();
- intersectors.intersector8 = BVH8Quad4vIntersector8HybridPluecker();
- intersectors.intersector16 = BVH8Quad4vIntersector16HybridPluecker();
- intersectors.intersectorN = BVH8Quad4vIntersectorStreamPluecker();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH8Factory::BVH8Quad4iIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Quad4iIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Quad4iIntersector4HybridMoeller();
- intersectors.intersector8 = BVH8Quad4iIntersector8HybridMoeller();
- intersectors.intersector16 = BVH8Quad4iIntersector16HybridMoeller();
- intersectors.intersectorN = BVH8Quad4iIntersectorStreamMoeller();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Quad4iIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Quad4iIntersector4HybridPluecker();
- intersectors.intersector8 = BVH8Quad4iIntersector8HybridPluecker();
- intersectors.intersector16 = BVH8Quad4iIntersector16HybridPluecker();
- intersectors.intersectorN = BVH8Quad4iIntersectorStreamPluecker();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH8Factory::BVH8Quad4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Quad4iMBIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Quad4iMBIntersector4HybridMoeller();
- intersectors.intersector8 = BVH8Quad4iMBIntersector8HybridMoeller();
- intersectors.intersector16 = BVH8Quad4iMBIntersector16HybridMoeller();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Quad4iMBIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Quad4iMBIntersector4HybridPluecker();
- intersectors.intersector8 = BVH8Quad4iMBIntersector8HybridPluecker();
- intersectors.intersector16 = BVH8Quad4iMBIntersector16HybridPluecker();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH8Factory::QBVH8Triangle4iIntersectors(BVH8* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = QBVH8Triangle4iIntersector1Pluecker();
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::QBVH8Triangle4Intersectors(BVH8* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = QBVH8Triangle4Intersector1Moeller();
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::QBVH8Quad4iIntersectors(BVH8* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = QBVH8Quad4iIntersector1Pluecker();
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::BVH8UserGeometryIntersectors(BVH8* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8VirtualIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8VirtualIntersector4Chunk();
- intersectors.intersector8 = BVH8VirtualIntersector8Chunk();
- intersectors.intersector16 = BVH8VirtualIntersector16Chunk();
- intersectors.intersectorN = BVH8VirtualIntersectorStream();
-#endif
- intersectors.collider = BVH8ColliderUserGeom();
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::BVH8UserGeometryMBIntersectors(BVH8* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8VirtualMBIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8VirtualMBIntersector4Chunk();
- intersectors.intersector8 = BVH8VirtualMBIntersector8Chunk();
- intersectors.intersector16 = BVH8VirtualMBIntersector16Chunk();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::BVH8InstanceIntersectors(BVH8* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8InstanceIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8InstanceIntersector4Chunk();
- intersectors.intersector8 = BVH8InstanceIntersector8Chunk();
- intersectors.intersector16 = BVH8InstanceIntersector16Chunk();
- intersectors.intersectorN = BVH8InstanceIntersectorStream();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::BVH8InstanceMBIntersectors(BVH8* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8InstanceMBIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8InstanceMBIntersector4Chunk();
- intersectors.intersector8 = BVH8InstanceMBIntersector8Chunk();
- intersectors.intersector16 = BVH8InstanceMBIntersector16Chunk();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
-
- Accel* BVH8Factory::BVH8OBBVirtualCurve8v(Scene* scene, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Curve8v::type,scene);
- Accel::Intersectors intersectors = BVH8OBBVirtualCurveIntersectors(accel,VirtualCurveIntersector8v(),ivariant);
- Builder* builder = BVH8Curve8vBuilder_OBB_New(accel,scene,0);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Curve8iMB::type,scene);
- Accel::Intersectors intersectors = BVH8OBBVirtualCurveIntersectorsMB(accel,VirtualCurveIntersector8iMB(),ivariant);
- Builder* builder = BVH8OBBCurve8iMBBuilder_OBB(accel,scene,0);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Triangle4(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Triangle4::type,scene);
- Accel::Intersectors intersectors= BVH8Triangle4Intersectors(accel,ivariant);
- Builder* builder = nullptr;
- if (scene->device->tri_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8Triangle4SceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelTriangle4MeshSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: builder = BVH8Triangle4SceneBuilderFastSpatialSAH(accel,scene,0); break;
- }
- }
- else if (scene->device->tri_builder == "sah" ) builder = BVH8Triangle4SceneBuilderSAH(accel,scene,0);
- else if (scene->device->tri_builder == "sah_fast_spatial") builder = BVH8Triangle4SceneBuilderFastSpatialSAH(accel,scene,0);
- else if (scene->device->tri_builder == "sah_presplit") builder = BVH8Triangle4SceneBuilderSAH(accel,scene,MODE_HIGH_QUALITY);
- else if (scene->device->tri_builder == "dynamic" ) builder = BVH8BuilderTwoLevelTriangle4MeshSAH(accel,scene,false);
- else if (scene->device->tri_builder == "morton" ) builder = BVH8BuilderTwoLevelTriangle4MeshSAH(accel,scene,true);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH8<Triangle4>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Triangle4v(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Triangle4v::type,scene);
- Accel::Intersectors intersectors= BVH8Triangle4vIntersectors(accel,ivariant);
- Builder* builder = nullptr;
- if (scene->device->tri_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8Triangle4vSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelTriangle4vMeshSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: builder = BVH8Triangle4vSceneBuilderFastSpatialSAH(accel,scene,0); break;
- }
- }
- else if (scene->device->tri_builder == "sah_fast_spatial") builder = BVH8Triangle4SceneBuilderFastSpatialSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH8<Triangle4v>");
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Triangle4i(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Triangle4i::type,scene);
- Accel::Intersectors intersectors = BVH8Triangle4iIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->tri_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8Triangle4iSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelTriangle4iMeshSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: assert(false); break; // FIXME: implement
- }
- }
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH8<Triangle4i>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Triangle4iMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Triangle4i::type,scene);
- Accel::Intersectors intersectors = BVH8Triangle4iMBIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->tri_builder_mb == "default") { // FIXME: implement
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8Triangle4iMBSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->tri_builder_mb == "internal_time_splits") builder = BVH8Triangle4iMBSceneBuilderSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder_mb+" for BVH8<Triangle4iMB>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Triangle4vMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Triangle4vMB::type,scene);
- Accel::Intersectors intersectors= BVH8Triangle4vMBIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->tri_builder_mb == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8Triangle4vMBSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->tri_builder_mb == "internal_time_splits") builder = BVH8Triangle4vMBSceneBuilderSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder_mb+" for BVH8<Triangle4vMB>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8QuantizedTriangle4i(Scene* scene)
- {
- BVH8* accel = new BVH8(Triangle4i::type,scene);
- Accel::Intersectors intersectors = QBVH8Triangle4iIntersectors(accel);
- Builder* builder = BVH8QuantizedTriangle4iSceneBuilderSAH(accel,scene,0);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8QuantizedTriangle4(Scene* scene)
- {
- BVH8* accel = new BVH8(Triangle4::type,scene);
- Accel::Intersectors intersectors = QBVH8Triangle4Intersectors(accel);
- Builder* builder = BVH8QuantizedTriangle4SceneBuilderSAH(accel,scene,0);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Quad4v(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Quad4v::type,scene);
- Accel::Intersectors intersectors = BVH8Quad4vIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->quad_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8Quad4vSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelQuadMeshSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: builder = BVH8Quad4vSceneBuilderFastSpatialSAH(accel,scene,0); break;
- }
- }
- else if (scene->device->quad_builder == "dynamic" ) builder = BVH8BuilderTwoLevelQuadMeshSAH(accel,scene,false);
- else if (scene->device->quad_builder == "morton" ) builder = BVH8BuilderTwoLevelQuadMeshSAH(accel,scene,true);
- else if (scene->device->quad_builder == "sah_fast_spatial" ) builder = BVH8Quad4vSceneBuilderFastSpatialSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder+" for BVH8<Quad4v>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Quad4i(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Quad4i::type,scene);
- Accel::Intersectors intersectors = BVH8Quad4iIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->quad_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8Quad4iSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement
- case BuildVariant::HIGH_QUALITY: assert(false); break; // FIXME: implement
- }
- }
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder+" for BVH8<Quad4i>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Quad4iMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Quad4i::type,scene);
- Accel::Intersectors intersectors = BVH8Quad4iMBIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->quad_builder_mb == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8Quad4iMBSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder_mb+" for BVH8<Quad4i>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8QuantizedQuad4i(Scene* scene)
- {
- BVH8* accel = new BVH8(Quad4i::type,scene);
- Accel::Intersectors intersectors = QBVH8Quad4iIntersectors(accel);
- Builder* builder = nullptr;
- if (scene->device->quad_builder == "default" ) builder = BVH8QuantizedQuad4iSceneBuilderSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder+" for QBVH8<Quad4i>");
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8UserGeometry(Scene* scene, BuildVariant bvariant)
- {
- BVH8* accel = new BVH8(Object::type,scene);
- Accel::Intersectors intersectors = BVH8UserGeometryIntersectors(accel);
-
- Builder* builder = nullptr;
- if (scene->device->object_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8VirtualSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelVirtualSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->object_builder == "sah") builder = BVH8VirtualSceneBuilderSAH(accel,scene,0);
- else if (scene->device->object_builder == "dynamic") builder = BVH8BuilderTwoLevelVirtualSAH(accel,scene,false);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH8<Object>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8UserGeometryMB(Scene* scene)
- {
- BVH8* accel = new BVH8(Object::type,scene);
- Accel::Intersectors intersectors = BVH8UserGeometryMBIntersectors(accel);
- Builder* builder = BVH8VirtualMBSceneBuilderSAH(accel,scene,0);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Instance(Scene* scene, bool isExpensive, BuildVariant bvariant)
- {
- BVH8* accel = new BVH8(InstancePrimitive::type,scene);
- Accel::Intersectors intersectors = BVH8InstanceIntersectors(accel);
- auto gtype = isExpensive ? Geometry::MTY_INSTANCE_EXPENSIVE : Geometry::MTY_INSTANCE;
- // Builder* builder = BVH8InstanceSceneBuilderSAH(accel,scene,gtype);
-
- Builder* builder = nullptr;
- if (scene->device->object_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8InstanceSceneBuilderSAH(accel,scene,gtype);; break;
- case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelInstanceSAH(accel,scene,gtype,false); break;
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->object_builder == "sah") builder = BVH8InstanceSceneBuilderSAH(accel,scene,gtype);
- else if (scene->device->object_builder == "dynamic") builder = BVH8BuilderTwoLevelInstanceSAH(accel,scene,gtype,false);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH8<Object>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8InstanceMB(Scene* scene, bool isExpensive)
- {
- BVH8* accel = new BVH8(InstancePrimitive::type,scene);
- Accel::Intersectors intersectors = BVH8InstanceMBIntersectors(accel);
- auto gtype = isExpensive ? Geometry::MTY_INSTANCE_EXPENSIVE : Geometry::MTY_INSTANCE;
- Builder* builder = BVH8InstanceMBSceneBuilderSAH(accel,scene,gtype);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel::Intersectors BVH8Factory::BVH8GridIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- if (ivariant == IntersectVariant::FAST)
- {
- intersectors.intersector1 = BVH8GridIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8GridIntersector4HybridMoeller();
- intersectors.intersector8 = BVH8GridIntersector8HybridMoeller();
- intersectors.intersector16 = BVH8GridIntersector16HybridMoeller();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- }
- else /* if (ivariant == IntersectVariant::ROBUST) */
- {
- intersectors.intersector1 = BVH8GridIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8GridIntersector4HybridPluecker();
- intersectors.intersector8 = BVH8GridIntersector8HybridPluecker();
- intersectors.intersector16 = BVH8GridIntersector16HybridPluecker();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- }
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::BVH8GridMBIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8GridMBIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = nullptr;
- intersectors.intersector8 = nullptr;
- intersectors.intersector16 = nullptr;
- intersectors.intersectorN = nullptr;
-#endif
- return intersectors;
- }
-
- Accel* BVH8Factory::BVH8Grid(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(SubGridQBVH8::type,scene);
- Accel::Intersectors intersectors = BVH8GridIntersectors(accel,ivariant);
- Builder* builder = nullptr;
- if (scene->device->grid_builder == "default") {
- builder = BVH8GridSceneBuilderSAH(accel,scene,0);
- }
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH4<GridMesh>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8GridMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(SubGridQBVH8::type,scene);
- Accel::Intersectors intersectors = BVH8GridMBIntersectors(accel,ivariant);
- Builder* builder = nullptr;
- if (scene->device->grid_builder_mb == "default") {
- builder = BVH8GridMBSceneBuilderSAH(accel,scene,0);
- }
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH8MB<GridMesh>");
- return new AccelInstance(accel,builder,intersectors);
- }
-}
-
-#endif
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.h b/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.h
deleted file mode 100644
index b92188e7d3..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.h
+++ /dev/null
@@ -1,280 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_factory.h"
-
-namespace embree
-{
- /*! BVH8 instantiations */
- class BVH8Factory : public BVHFactory
- {
- public:
- BVH8Factory(int bfeatures, int ifeatures);
-
- public:
- Accel* BVH8OBBVirtualCurve8v(Scene* scene, IntersectVariant ivariant);
- Accel* BVH8OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant);
- DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8v);
- DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8iMB);
-
- Accel* BVH8Triangle4 (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH8Triangle4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH8Triangle4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH8Triangle4vMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH8Triangle4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
-
- Accel* BVH8Quad4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH8Quad4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH8Quad4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
-
- Accel* BVH8QuantizedTriangle4i(Scene* scene);
- Accel* BVH8QuantizedTriangle4(Scene* scene);
- Accel* BVH8QuantizedQuad4i(Scene* scene);
-
- Accel* BVH8UserGeometry(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
- Accel* BVH8UserGeometryMB(Scene* scene);
-
- Accel* BVH8Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC);
- Accel* BVH8InstanceMB(Scene* scene, bool isExpensive);
-
- Accel* BVH8Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH8GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
-
- private:
- void selectBuilders(int features);
- void selectIntersectors(int features);
-
- private:
- Accel::Intersectors BVH8OBBVirtualCurveIntersectors(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
- Accel::Intersectors BVH8OBBVirtualCurveIntersectorsMB(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
-
- Accel::Intersectors BVH8Triangle4Intersectors(BVH8* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH8Triangle4vIntersectors(BVH8* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH8Triangle4iIntersectors(BVH8* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH8Triangle4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH8Triangle4vMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
-
- Accel::Intersectors BVH8Quad4vIntersectors(BVH8* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH8Quad4iIntersectors(BVH8* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH8Quad4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
-
- Accel::Intersectors QBVH8Triangle4iIntersectors(BVH8* bvh);
- Accel::Intersectors QBVH8Triangle4Intersectors(BVH8* bvh);
- Accel::Intersectors QBVH8Quad4iIntersectors(BVH8* bvh);
-
- Accel::Intersectors BVH8UserGeometryIntersectors(BVH8* bvh);
- Accel::Intersectors BVH8UserGeometryMBIntersectors(BVH8* bvh);
-
- Accel::Intersectors BVH8InstanceIntersectors(BVH8* bvh);
- Accel::Intersectors BVH8InstanceMBIntersectors(BVH8* bvh);
-
- Accel::Intersectors BVH8GridIntersectors(BVH8* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH8GridMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
-
- private:
- DEFINE_SYMBOL2(Accel::Collider,BVH8ColliderUserGeom);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1MB);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1MB);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4Intersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Pluecker);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Pluecker);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Woop);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Pluecker);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4iIntersector1Pluecker);
- DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4Intersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Quad4iIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8VirtualIntersector1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8VirtualMBIntersector1);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceIntersector1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceMBIntersector1);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridMBIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4HybridMB);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4HybridMB);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vIntersector4HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8VirtualIntersector4Chunk);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8VirtualMBIntersector4Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceIntersector4Chunk);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceMBIntersector4Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8HybridMB);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8HybridMB);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vIntersector8HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8VirtualIntersector8Chunk);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8VirtualMBIntersector8Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceIntersector8Chunk);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceMBIntersector8Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16HybridMB);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16HybridMB);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vIntersector16HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8VirtualIntersector16Chunk);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8VirtualMBIntersector16Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceIntersector16Chunk);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceMBIntersector16Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8IntersectorStreamPacketFallback);
-
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoeller);
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamMoeller);
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4vIntersectorStreamPluecker);
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamPluecker);
-
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoeller);
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamMoeller);
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamPluecker);
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamPluecker);
-
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8VirtualIntersectorStream);
-
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8InstanceIntersectorStream);
-
- // SAH scene builders
- private:
- DEFINE_ISA_FUNCTION(Builder*,BVH8Curve8vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH8VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
- DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH8GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- // SAH spatial scene builders
- private:
- DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
-
- // twolevel scene builders
- private:
- DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.cpp
deleted file mode 100644
index e832537ec5..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh_builder.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int N>
- typename BVHN<N>::NodeRef BVHNBuilderVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings)
- {
- auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRef {
- return createLeaf(prims,set,alloc);
- };
-
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxBuildDepthLeaf;
- return BVHBuilderBinnedSAH::build<NodeRef>
- (FastAllocator::Create(allocator),typename BVH::AABBNode::Create2(),typename BVH::AABBNode::Set3(allocator,prims),createLeafFunc,progressFunc,prims,pinfo,settings);
- }
-
-
- template<int N>
- typename BVHN<N>::NodeRef BVHNBuilderQuantizedVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings)
- {
- auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRef {
- return createLeaf(prims,set,alloc);
- };
-
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxBuildDepthLeaf;
- return BVHBuilderBinnedSAH::build<NodeRef>
- (FastAllocator::Create(allocator),typename BVH::QuantizedNode::Create2(),typename BVH::QuantizedNode::Set2(),createLeafFunc,progressFunc,prims,pinfo,settings);
- }
-
- template<int N>
- typename BVHN<N>::NodeRecordMB BVHNBuilderMblurVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange)
- {
- auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRecordMB {
- return createLeaf(prims,set,alloc);
- };
-
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxBuildDepthLeaf;
- return BVHBuilderBinnedSAH::build<NodeRecordMB>
- (FastAllocator::Create(allocator),typename BVH::AABBNodeMB::Create(),typename BVH::AABBNodeMB::SetTimeRange(timeRange),createLeafFunc,progressFunc,prims,pinfo,settings);
- }
-
- template struct BVHNBuilderVirtual<4>;
- template struct BVHNBuilderQuantizedVirtual<4>;
- template struct BVHNBuilderMblurVirtual<4>;
-
-#if defined(__AVX__)
- template struct BVHNBuilderVirtual<8>;
- template struct BVHNBuilderQuantizedVirtual<8>;
- template struct BVHNBuilderMblurVirtual<8>;
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.h
deleted file mode 100644
index 1b86bb45ad..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.h
+++ /dev/null
@@ -1,114 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh.h"
-#include "../builders/bvh_builder_sah.h"
-
-namespace embree
-{
- namespace isa
- {
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
- template<int N>
- struct BVHNBuilderVirtual
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef FastAllocator::CachedAllocator Allocator;
-
- struct BVHNBuilderV {
- NodeRef build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings);
- virtual NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
- };
-
- template<typename CreateLeafFunc>
- struct BVHNBuilderT : public BVHNBuilderV
- {
- BVHNBuilderT (CreateLeafFunc createLeafFunc)
- : createLeafFunc(createLeafFunc) {}
-
- NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
- return createLeafFunc(prims,set,alloc);
- }
-
- private:
- CreateLeafFunc createLeafFunc;
- };
-
- template<typename CreateLeafFunc>
- static NodeRef build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings) {
- return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings);
- }
- };
-
- template<int N>
- struct BVHNBuilderQuantizedVirtual
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef FastAllocator::CachedAllocator Allocator;
-
- struct BVHNBuilderV {
- NodeRef build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings);
- virtual NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
- };
-
- template<typename CreateLeafFunc>
- struct BVHNBuilderT : public BVHNBuilderV
- {
- BVHNBuilderT (CreateLeafFunc createLeafFunc)
- : createLeafFunc(createLeafFunc) {}
-
- NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
- return createLeafFunc(prims,set,alloc);
- }
-
- private:
- CreateLeafFunc createLeafFunc;
- };
-
- template<typename CreateLeafFunc>
- static NodeRef build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings) {
- return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings);
- }
- };
-
- template<int N>
- struct BVHNBuilderMblurVirtual
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::AABBNodeMB AABBNodeMB;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecordMB NodeRecordMB;
- typedef FastAllocator::CachedAllocator Allocator;
-
- struct BVHNBuilderV {
- NodeRecordMB build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange);
- virtual NodeRecordMB createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
- };
-
- template<typename CreateLeafFunc>
- struct BVHNBuilderT : public BVHNBuilderV
- {
- BVHNBuilderT (CreateLeafFunc createLeafFunc)
- : createLeafFunc(createLeafFunc) {}
-
- NodeRecordMB createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
- return createLeafFunc(prims,set,alloc);
- }
-
- private:
- CreateLeafFunc createLeafFunc;
- };
-
- template<typename CreateLeafFunc>
- static NodeRecordMB build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange) {
- return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings,timeRange);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_morton.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_morton.cpp
deleted file mode 100644
index 64759c1294..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_morton.cpp
+++ /dev/null
@@ -1,531 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh.h"
-#include "bvh_statistics.h"
-#include "bvh_rotate.h"
-#include "../common/profile.h"
-#include "../../common/algorithms/parallel_prefix_sum.h"
-
-#include "../builders/primrefgen.h"
-#include "../builders/bvh_builder_morton.h"
-
-#include "../geometry/triangle.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/trianglei.h"
-#include "../geometry/quadv.h"
-#include "../geometry/quadi.h"
-#include "../geometry/object.h"
-#include "../geometry/instance.h"
-
-#if defined(__X86_64__) || defined(__aarch64__)
-# define ROTATE_TREE 1 // specifies number of tree rotation rounds to perform
-#else
-# define ROTATE_TREE 0 // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues
-#endif
-
-namespace embree
-{
- namespace isa
- {
- template<int N>
- struct SetBVHNBounds
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecord NodeRecord;
- typedef typename BVH::AABBNode AABBNode;
-
- BVH* bvh;
- __forceinline SetBVHNBounds (BVH* bvh) : bvh(bvh) {}
-
- __forceinline NodeRecord operator() (NodeRef ref, const NodeRecord* children, size_t num)
- {
- AABBNode* node = ref.getAABBNode();
-
- BBox3fa res = empty;
- for (size_t i=0; i<num; i++) {
- const BBox3fa b = children[i].bounds;
- res.extend(b);
- node->setRef(i,children[i].ref);
- node->setBounds(i,b);
- }
-
- BBox3fx result = (BBox3fx&)res;
-#if ROTATE_TREE
- if (N == 4)
- {
- size_t n = 0;
- for (size_t i=0; i<num; i++)
- n += children[i].bounds.lower.a;
-
- if (n >= 4096) {
- for (size_t i=0; i<num; i++) {
- if (children[i].bounds.lower.a < 4096) {
- for (int j=0; j<ROTATE_TREE; j++)
- BVHNRotate<N>::rotate(node->child(i));
- node->child(i).setBarrier();
- }
- }
- }
- result.lower.a = unsigned(n);
- }
-#endif
-
- return NodeRecord(ref,result);
- }
- };
-
- template<int N, typename Primitive>
- struct CreateMortonLeaf;
-
- template<int N>
- struct CreateMortonLeaf<N,Triangle4>
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecord NodeRecord;
-
- __forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
- : mesh(mesh), morton(morton), geomID_(geomID) {}
-
- __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
- {
- vfloat4 lower(pos_inf);
- vfloat4 upper(neg_inf);
- size_t items = current.size();
- size_t start = current.begin();
- assert(items<=4);
-
- /* allocate leaf node */
- Triangle4* accel = (Triangle4*) alloc.malloc1(sizeof(Triangle4),BVH::byteAlignment);
- NodeRef ref = BVH::encodeLeaf((char*)accel,1);
- vuint4 vgeomID = -1, vprimID = -1;
- Vec3vf4 v0 = zero, v1 = zero, v2 = zero;
- const TriangleMesh* __restrict__ const mesh = this->mesh;
-
- for (size_t i=0; i<items; i++)
- {
- const unsigned int primID = morton[start+i].index;
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- const Vec3fa& p0 = mesh->vertex(tri.v[0]);
- const Vec3fa& p1 = mesh->vertex(tri.v[1]);
- const Vec3fa& p2 = mesh->vertex(tri.v[2]);
- lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
- upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
- vgeomID [i] = geomID_;
- vprimID [i] = primID;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- }
-
- Triangle4::store_nt(accel,Triangle4(v0,v1,v2,vgeomID,vprimID));
- BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
-#if ROTATE_TREE
- if (N == 4)
- box_o.lower.a = unsigned(current.size());
-#endif
- return NodeRecord(ref,box_o);
- }
-
- private:
- TriangleMesh* mesh;
- BVHBuilderMorton::BuildPrim* morton;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- };
-
- template<int N>
- struct CreateMortonLeaf<N,Triangle4v>
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecord NodeRecord;
-
- __forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
- : mesh(mesh), morton(morton), geomID_(geomID) {}
-
- __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
- {
- vfloat4 lower(pos_inf);
- vfloat4 upper(neg_inf);
- size_t items = current.size();
- size_t start = current.begin();
- assert(items<=4);
-
- /* allocate leaf node */
- Triangle4v* accel = (Triangle4v*) alloc.malloc1(sizeof(Triangle4v),BVH::byteAlignment);
- NodeRef ref = BVH::encodeLeaf((char*)accel,1);
- vuint4 vgeomID = -1, vprimID = -1;
- Vec3vf4 v0 = zero, v1 = zero, v2 = zero;
- const TriangleMesh* __restrict__ mesh = this->mesh;
-
- for (size_t i=0; i<items; i++)
- {
- const unsigned int primID = morton[start+i].index;
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- const Vec3fa& p0 = mesh->vertex(tri.v[0]);
- const Vec3fa& p1 = mesh->vertex(tri.v[1]);
- const Vec3fa& p2 = mesh->vertex(tri.v[2]);
- lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
- upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
- vgeomID [i] = geomID_;
- vprimID [i] = primID;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- }
- Triangle4v::store_nt(accel,Triangle4v(v0,v1,v2,vgeomID,vprimID));
- BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
-#if ROTATE_TREE
- if (N == 4)
- box_o.lower.a = current.size();
-#endif
- return NodeRecord(ref,box_o);
- }
- private:
- TriangleMesh* mesh;
- BVHBuilderMorton::BuildPrim* morton;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- };
-
- template<int N>
- struct CreateMortonLeaf<N,Triangle4i>
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecord NodeRecord;
-
- __forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
- : mesh(mesh), morton(morton), geomID_(geomID) {}
-
- __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
- {
- vfloat4 lower(pos_inf);
- vfloat4 upper(neg_inf);
- size_t items = current.size();
- size_t start = current.begin();
- assert(items<=4);
-
- /* allocate leaf node */
- Triangle4i* accel = (Triangle4i*) alloc.malloc1(sizeof(Triangle4i),BVH::byteAlignment);
- NodeRef ref = BVH::encodeLeaf((char*)accel,1);
-
- vuint4 v0 = zero, v1 = zero, v2 = zero;
- vuint4 vgeomID = -1, vprimID = -1;
- const TriangleMesh* __restrict__ const mesh = this->mesh;
-
- for (size_t i=0; i<items; i++)
- {
- const unsigned int primID = morton[start+i].index;
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- const Vec3fa& p0 = mesh->vertex(tri.v[0]);
- const Vec3fa& p1 = mesh->vertex(tri.v[1]);
- const Vec3fa& p2 = mesh->vertex(tri.v[2]);
- lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
- upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
- vgeomID[i] = geomID_;
- vprimID[i] = primID;
- unsigned int int_stride = mesh->vertices0.getStride()/4;
- v0[i] = tri.v[0] * int_stride;
- v1[i] = tri.v[1] * int_stride;
- v2[i] = tri.v[2] * int_stride;
- }
-
- for (size_t i=items; i<4; i++)
- {
- vgeomID[i] = vgeomID[0];
- vprimID[i] = -1;
- v0[i] = 0;
- v1[i] = 0;
- v2[i] = 0;
- }
- Triangle4i::store_nt(accel,Triangle4i(v0,v1,v2,vgeomID,vprimID));
- BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
-#if ROTATE_TREE
- if (N == 4)
- box_o.lower.a = current.size();
-#endif
- return NodeRecord(ref,box_o);
- }
- private:
- TriangleMesh* mesh;
- BVHBuilderMorton::BuildPrim* morton;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- };
-
- template<int N>
- struct CreateMortonLeaf<N,Quad4v>
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecord NodeRecord;
-
- __forceinline CreateMortonLeaf (QuadMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
- : mesh(mesh), morton(morton), geomID_(geomID) {}
-
- __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
- {
- vfloat4 lower(pos_inf);
- vfloat4 upper(neg_inf);
- size_t items = current.size();
- size_t start = current.begin();
- assert(items<=4);
-
- /* allocate leaf node */
- Quad4v* accel = (Quad4v*) alloc.malloc1(sizeof(Quad4v),BVH::byteAlignment);
- NodeRef ref = BVH::encodeLeaf((char*)accel,1);
-
- vuint4 vgeomID = -1, vprimID = -1;
- Vec3vf4 v0 = zero, v1 = zero, v2 = zero, v3 = zero;
- const QuadMesh* __restrict__ mesh = this->mesh;
-
- for (size_t i=0; i<items; i++)
- {
- const unsigned int primID = morton[start+i].index;
- const QuadMesh::Quad& tri = mesh->quad(primID);
- const Vec3fa& p0 = mesh->vertex(tri.v[0]);
- const Vec3fa& p1 = mesh->vertex(tri.v[1]);
- const Vec3fa& p2 = mesh->vertex(tri.v[2]);
- const Vec3fa& p3 = mesh->vertex(tri.v[3]);
- lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2,(vfloat4)p3);
- upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2,(vfloat4)p3);
- vgeomID [i] = geomID_;
- vprimID [i] = primID;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z;
- }
- Quad4v::store_nt(accel,Quad4v(v0,v1,v2,v3,vgeomID,vprimID));
- BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
-#if ROTATE_TREE
- if (N == 4)
- box_o.lower.a = current.size();
-#endif
- return NodeRecord(ref,box_o);
- }
- private:
- QuadMesh* mesh;
- BVHBuilderMorton::BuildPrim* morton;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- };
-
- template<int N>
- struct CreateMortonLeaf<N,Object>
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecord NodeRecord;
-
- __forceinline CreateMortonLeaf (UserGeometry* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
- : mesh(mesh), morton(morton), geomID_(geomID) {}
-
- __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
- {
- vfloat4 lower(pos_inf);
- vfloat4 upper(neg_inf);
- size_t items = current.size();
- size_t start = current.begin();
-
- /* allocate leaf node */
- Object* accel = (Object*) alloc.malloc1(items*sizeof(Object),BVH::byteAlignment);
- NodeRef ref = BVH::encodeLeaf((char*)accel,items);
- const UserGeometry* mesh = this->mesh;
-
- BBox3fa bounds = empty;
- for (size_t i=0; i<items; i++)
- {
- const unsigned int index = morton[start+i].index;
- const unsigned int primID = index;
- bounds.extend(mesh->bounds(primID));
- new (&accel[i]) Object(geomID_,primID);
- }
-
- BBox3fx box_o = (BBox3fx&)bounds;
-#if ROTATE_TREE
- if (N == 4)
- box_o.lower.a = current.size();
-#endif
- return NodeRecord(ref,box_o);
- }
- private:
- UserGeometry* mesh;
- BVHBuilderMorton::BuildPrim* morton;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- };
-
- template<int N>
- struct CreateMortonLeaf<N,InstancePrimitive>
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecord NodeRecord;
-
- __forceinline CreateMortonLeaf (Instance* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
- : mesh(mesh), morton(morton), geomID_(geomID) {}
-
- __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
- {
- vfloat4 lower(pos_inf);
- vfloat4 upper(neg_inf);
- size_t items = current.size();
- size_t start = current.begin();
- assert(items <= 1);
-
- /* allocate leaf node */
- InstancePrimitive* accel = (InstancePrimitive*) alloc.malloc1(items*sizeof(InstancePrimitive),BVH::byteAlignment);
- NodeRef ref = BVH::encodeLeaf((char*)accel,items);
- const Instance* instance = this->mesh;
-
- BBox3fa bounds = empty;
- for (size_t i=0; i<items; i++)
- {
- const unsigned int primID = morton[start+i].index;
- bounds.extend(instance->bounds(primID));
- new (&accel[i]) InstancePrimitive(instance, geomID_);
- }
-
- BBox3fx box_o = (BBox3fx&)bounds;
-#if ROTATE_TREE
- if (N == 4)
- box_o.lower.a = current.size();
-#endif
- return NodeRecord(ref,box_o);
- }
- private:
- Instance* mesh;
- BVHBuilderMorton::BuildPrim* morton;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- };
-
- template<typename Mesh>
- struct CalculateMeshBounds
- {
- __forceinline CalculateMeshBounds (Mesh* mesh)
- : mesh(mesh) {}
-
- __forceinline const BBox3fa operator() (const BVHBuilderMorton::BuildPrim& morton) {
- return mesh->bounds(morton.index);
- }
-
- private:
- Mesh* mesh;
- };
-
- template<int N, typename Mesh, typename Primitive>
- class BVHNMeshBuilderMorton : public Builder
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::AABBNode AABBNode;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecord NodeRecord;
-
- public:
-
- BVHNMeshBuilderMorton (BVH* bvh, Mesh* mesh, unsigned int geomID, const size_t minLeafSize, const size_t maxLeafSize, const size_t singleThreadThreshold = DEFAULT_SINGLE_THREAD_THRESHOLD)
- : bvh(bvh), mesh(mesh), morton(bvh->device,0), settings(N,BVH::maxBuildDepth,minLeafSize,min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks),singleThreadThreshold), geomID_(geomID) {}
-
- /* build function */
- void build()
- {
- /* we reset the allocator when the mesh size changed */
- if (mesh->numPrimitives != numPreviousPrimitives) {
- bvh->alloc.clear();
- morton.clear();
- }
- size_t numPrimitives = mesh->size();
- numPreviousPrimitives = numPrimitives;
-
- /* skip build for empty scene */
- if (numPrimitives == 0) {
- bvh->set(BVH::emptyNode,empty,0);
- return;
- }
-
- /* preallocate arrays */
- morton.resize(numPrimitives);
- size_t bytesEstimated = numPrimitives*sizeof(AABBNode)/(4*N) + size_t(1.2f*Primitive::blocks(numPrimitives)*sizeof(Primitive));
- size_t bytesMortonCodes = numPrimitives*sizeof(BVHBuilderMorton::BuildPrim);
- bytesEstimated = max(bytesEstimated,bytesMortonCodes); // the first allocation block is reused to sort the morton codes
- bvh->alloc.init(bytesMortonCodes,bytesMortonCodes,bytesEstimated);
-
- /* create morton code array */
- BVHBuilderMorton::BuildPrim* dest = (BVHBuilderMorton::BuildPrim*) bvh->alloc.specialAlloc(bytesMortonCodes);
- size_t numPrimitivesGen = createMortonCodeArray<Mesh>(mesh,morton,bvh->scene->progressInterface);
-
- /* create BVH */
- SetBVHNBounds<N> setBounds(bvh);
- CreateMortonLeaf<N,Primitive> createLeaf(mesh,geomID_,morton.data());
- CalculateMeshBounds<Mesh> calculateBounds(mesh);
- auto root = BVHBuilderMorton::build<NodeRecord>(
- typename BVH::CreateAlloc(bvh),
- typename BVH::AABBNode::Create(),
- setBounds,createLeaf,calculateBounds,bvh->scene->progressInterface,
- morton.data(),dest,numPrimitivesGen,settings);
-
- bvh->set(root.ref,LBBox3fa(root.bounds),numPrimitives);
-
-#if ROTATE_TREE
- if (N == 4)
- {
- for (int i=0; i<ROTATE_TREE; i++)
- BVHNRotate<N>::rotate(bvh->root);
- bvh->clearBarrier(bvh->root);
- }
-#endif
-
- /* clear temporary data for static geometry */
- if (bvh->scene->isStaticAccel()) {
- morton.clear();
- }
- bvh->cleanup();
- }
-
- void clear() {
- morton.clear();
- }
-
- private:
- BVH* bvh;
- Mesh* mesh;
- mvector<BVHBuilderMorton::BuildPrim> morton;
- BVHBuilderMorton::Settings settings;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- unsigned int numPreviousPrimitives = 0;
- };
-
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- Builder* BVH4Triangle4MeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4> ((BVH4*)bvh,mesh,geomID,4,4); }
- Builder* BVH4Triangle4vMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4v>((BVH4*)bvh,mesh,geomID,4,4); }
- Builder* BVH4Triangle4iMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4i>((BVH4*)bvh,mesh,geomID,4,4); }
-#if defined(__AVX__)
- Builder* BVH8Triangle4MeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4> ((BVH8*)bvh,mesh,geomID,4,4); }
- Builder* BVH8Triangle4vMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4v>((BVH8*)bvh,mesh,geomID,4,4); }
- Builder* BVH8Triangle4iMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4i>((BVH8*)bvh,mesh,geomID,4,4); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_QUAD)
- Builder* BVH4Quad4vMeshBuilderMortonGeneral (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,QuadMesh,Quad4v>((BVH4*)bvh,mesh,geomID,4,4); }
-#if defined(__AVX__)
- Builder* BVH8Quad4vMeshBuilderMortonGeneral (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,QuadMesh,Quad4v>((BVH8*)bvh,mesh,geomID,4,4); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_USER)
- Builder* BVH4VirtualMeshBuilderMortonGeneral (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,UserGeometry,Object>((BVH4*)bvh,mesh,geomID,1,BVH4::maxLeafBlocks); }
-#if defined(__AVX__)
- Builder* BVH8VirtualMeshBuilderMortonGeneral (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,UserGeometry,Object>((BVH8*)bvh,mesh,geomID,1,BVH4::maxLeafBlocks); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- Builder* BVH4InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,Instance,InstancePrimitive>((BVH4*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
-#if defined(__AVX__)
- Builder* BVH8InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,Instance,InstancePrimitive>((BVH8*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
-#endif
-#endif
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah.cpp
deleted file mode 100644
index cf5b2eb47f..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah.cpp
+++ /dev/null
@@ -1,640 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh.h"
-#include "bvh_builder.h"
-#include "../builders/primrefgen.h"
-#include "../builders/splitter.h"
-
-#include "../geometry/linei.h"
-#include "../geometry/triangle.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/trianglev_mb.h"
-#include "../geometry/trianglei.h"
-#include "../geometry/quadv.h"
-#include "../geometry/quadi.h"
-#include "../geometry/object.h"
-#include "../geometry/instance.h"
-#include "../geometry/subgrid.h"
-
-#include "../common/state.h"
-#include "../../common/algorithms/parallel_for_for.h"
-#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
-
-#define PROFILE 0
-#define PROFILE_RUNS 20
-
-namespace embree
-{
- namespace isa
- {
- template<int N, typename Primitive>
- struct CreateLeaf
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
-
- __forceinline CreateLeaf (BVH* bvh) : bvh(bvh) {}
-
- __forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
- {
- size_t n = set.size();
- size_t items = Primitive::blocks(n);
- size_t start = set.begin();
- Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
- typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
- for (size_t i=0; i<items; i++) {
- accel[i].fill(prims,start,set.end(),bvh->scene);
- }
- return node;
- }
-
- BVH* bvh;
- };
-
-
- template<int N, typename Primitive>
- struct CreateLeafQuantized
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
-
- __forceinline CreateLeafQuantized (BVH* bvh) : bvh(bvh) {}
-
- __forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
- {
- size_t n = set.size();
- size_t items = Primitive::blocks(n);
- size_t start = set.begin();
- Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
- typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
- for (size_t i=0; i<items; i++) {
- accel[i].fill(prims,start,set.end(),bvh->scene);
- }
- return node;
- }
-
- BVH* bvh;
- };
-
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
- template<int N, typename Primitive>
- struct BVHNBuilderSAH : public Builder
- {
- typedef BVHN<N> BVH;
- typedef typename BVHN<N>::NodeRef NodeRef;
-
- BVH* bvh;
- Scene* scene;
- Geometry* mesh;
- mvector<PrimRef> prims;
- GeneralBVHBuilder::Settings settings;
- Geometry::GTypeMask gtype_;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max ();
- bool primrefarrayalloc;
- unsigned int numPreviousPrimitives = 0;
-
- BVHNBuilderSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize,
- const Geometry::GTypeMask gtype, bool primrefarrayalloc = false)
- : bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0),
- settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), primrefarrayalloc(primrefarrayalloc) {}
-
- BVHNBuilderSAH (BVH* bvh, Geometry* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
- : bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), geomID_(geomID), primrefarrayalloc(false) {}
-
- // FIXME: shrink bvh->alloc in destructor here and in other builders too
-
- void build()
- {
- /* we reset the allocator when the mesh size changed */
- if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
- bvh->alloc.clear();
- }
-
- /* if we use the primrefarray for allocations we have to take it back from the BVH */
- if (settings.primrefarrayalloc != size_t(inf))
- bvh->alloc.unshare(prims);
-
- /* skip build for empty scene */
- const size_t numPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(gtype_,false);
- numPreviousPrimitives = numPrimitives;
- if (numPrimitives == 0) {
- bvh->clear();
- prims.clear();
- return;
- }
-
- double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + "BuilderSAH");
-
-#if PROFILE
- profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
-#endif
-
- /* create primref array */
- if (primrefarrayalloc) {
- settings.primrefarrayalloc = numPrimitives/1000;
- if (settings.primrefarrayalloc < 1000)
- settings.primrefarrayalloc = inf;
- }
-
- /* enable os_malloc for two level build */
- if (mesh)
- bvh->alloc.setOSallocation(true);
-
- /* initialize allocator */
- const size_t node_bytes = numPrimitives*sizeof(typename BVH::AABBNodeMB)/(4*N);
- const size_t leaf_bytes = size_t(1.2*Primitive::blocks(numPrimitives)*sizeof(Primitive));
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
- prims.resize(numPrimitives);
-
- PrimInfo pinfo = mesh ?
- createPrimRefArray(mesh,geomID_,prims,bvh->scene->progressInterface) :
- createPrimRefArray(scene,gtype_,false,prims,bvh->scene->progressInterface);
-
- /* pinfo might has zero size due to invalid geometry */
- if (unlikely(pinfo.size() == 0))
- {
- bvh->clear();
- prims.clear();
- return;
- }
-
- /* call BVH builder */
- NodeRef root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeaf<N,Primitive>(bvh),bvh->scene->progressInterface,prims.data(),pinfo,settings);
- bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
- bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
-
-#if PROFILE
- });
-#endif
-
- /* if we allocated using the primrefarray we have to keep it alive */
- if (settings.primrefarrayalloc != size_t(inf))
- bvh->alloc.share(prims);
-
- /* for static geometries we can do some cleanups */
- else if (scene && scene->isStaticAccel()) {
- prims.clear();
- }
- bvh->cleanup();
- bvh->postBuild(t0);
- }
-
- void clear() {
- prims.clear();
- }
- };
-
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
- template<int N, typename Primitive>
- struct BVHNBuilderSAHQuantized : public Builder
- {
- typedef BVHN<N> BVH;
- typedef typename BVHN<N>::NodeRef NodeRef;
-
- BVH* bvh;
- Scene* scene;
- Geometry* mesh;
- mvector<PrimRef> prims;
- GeneralBVHBuilder::Settings settings;
- Geometry::GTypeMask gtype_;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- unsigned int numPreviousPrimitives = 0;
-
- BVHNBuilderSAHQuantized (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
- : bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype) {}
-
- BVHNBuilderSAHQuantized (BVH* bvh, Geometry* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
- : bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), geomID_(geomID) {}
-
- // FIXME: shrink bvh->alloc in destructor here and in other builders too
-
- void build()
- {
- /* we reset the allocator when the mesh size changed */
- if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
- bvh->alloc.clear();
- }
-
- /* skip build for empty scene */
- const size_t numPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(gtype_,false);
- numPreviousPrimitives = numPrimitives;
- if (numPrimitives == 0) {
- prims.clear();
- bvh->clear();
- return;
- }
-
- double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::QBVH" + toString(N) + "BuilderSAH");
-
-#if PROFILE
- profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
-#endif
- /* create primref array */
- prims.resize(numPrimitives);
- PrimInfo pinfo = mesh ?
- createPrimRefArray(mesh,geomID_,prims,bvh->scene->progressInterface) :
- createPrimRefArray(scene,gtype_,false,prims,bvh->scene->progressInterface);
-
- /* enable os_malloc for two level build */
- if (mesh)
- bvh->alloc.setOSallocation(true);
-
- /* call BVH builder */
- const size_t node_bytes = numPrimitives*sizeof(typename BVH::QuantizedNode)/(4*N);
- const size_t leaf_bytes = size_t(1.2*Primitive::blocks(numPrimitives)*sizeof(Primitive));
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
- NodeRef root = BVHNBuilderQuantizedVirtual<N>::build(&bvh->alloc,CreateLeafQuantized<N,Primitive>(bvh),bvh->scene->progressInterface,prims.data(),pinfo,settings);
- bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
- //bvh->layoutLargeNodes(pinfo.size()*0.005f); // FIXME: COPY LAYOUT FOR LARGE NODES !!!
-#if PROFILE
- });
-#endif
-
- /* clear temporary data for static geometry */
- if (scene && scene->isStaticAccel()) {
- prims.clear();
- }
- bvh->cleanup();
- bvh->postBuild(t0);
- }
-
- void clear() {
- prims.clear();
- }
- };
-
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
-
- template<int N, typename Primitive>
- struct CreateLeafGrid
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
-
- __forceinline CreateLeafGrid (BVH* bvh, const SubGridBuildData * const sgrids) : bvh(bvh),sgrids(sgrids) {}
-
- __forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
- {
- const size_t items = set.size(); //Primitive::blocks(n);
- const size_t start = set.begin();
-
- /* collect all subsets with unique geomIDs */
- assert(items <= N);
- unsigned int geomIDs[N];
- unsigned int num_geomIDs = 1;
- geomIDs[0] = prims[start].geomID();
-
- for (size_t i=1;i<items;i++)
- {
- bool found = false;
- const unsigned int new_geomID = prims[start+i].geomID();
- for (size_t j=0;j<num_geomIDs;j++)
- if (new_geomID == geomIDs[j])
- { found = true; break; }
- if (!found)
- geomIDs[num_geomIDs++] = new_geomID;
- }
-
- /* allocate all leaf memory in one single block */
- SubGridQBVHN<N>* accel = (SubGridQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridQBVHN<N>),BVH::byteAlignment);
- typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,num_geomIDs);
-
- for (size_t g=0;g<num_geomIDs;g++)
- {
- unsigned int x[N];
- unsigned int y[N];
- unsigned int primID[N];
- BBox3fa bounds[N];
- unsigned int pos = 0;
- for (size_t i=0;i<items;i++)
- {
- if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
-
- const SubGridBuildData& sgrid_bd = sgrids[prims[start+i].primID()];
- x[pos] = sgrid_bd.sx;
- y[pos] = sgrid_bd.sy;
- primID[pos] = sgrid_bd.primID;
- bounds[pos] = prims[start+i].bounds();
- pos++;
- }
- assert(pos <= N);
- new (&accel[g]) SubGridQBVHN<N>(x,y,primID,bounds,geomIDs[g],pos);
- }
-
- return node;
- }
-
- BVH* bvh;
- const SubGridBuildData * const sgrids;
- };
-
-
- template<int N>
- struct BVHNBuilderSAHGrid : public Builder
- {
- typedef BVHN<N> BVH;
- typedef typename BVHN<N>::NodeRef NodeRef;
-
- BVH* bvh;
- Scene* scene;
- GridMesh* mesh;
- mvector<PrimRef> prims;
- mvector<SubGridBuildData> sgrids;
- GeneralBVHBuilder::Settings settings;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- unsigned int numPreviousPrimitives = 0;
-
- BVHNBuilderSAHGrid (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
- : bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0), sgrids(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD) {}
-
- BVHNBuilderSAHGrid (BVH* bvh, GridMesh* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
- : bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), sgrids(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), geomID_(geomID) {}
-
- void build()
- {
- /* we reset the allocator when the mesh size changed */
- if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
- bvh->alloc.clear();
- }
-
- /* if we use the primrefarray for allocations we have to take it back from the BVH */
- if (settings.primrefarrayalloc != size_t(inf))
- bvh->alloc.unshare(prims);
-
- const size_t numGridPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(GridMesh::geom_type,false);
- numPreviousPrimitives = numGridPrimitives;
-
- PrimInfo pinfo(empty);
- size_t numPrimitives = 0;
-
- if (!mesh)
- {
- /* first run to get #primitives */
-
- ParallelForForPrefixSumState<PrimInfo> pstate;
- Scene::Iterator<GridMesh,false> iter(scene);
-
- pstate.init(iter,size_t(1024));
-
- /* iterate over all meshes in the scene */
- pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!mesh->valid(j)) continue;
- BBox3fa bounds = empty;
- const PrimRef prim(bounds,(unsigned)geomID,(unsigned)j);
- if (!mesh->valid(j)) continue;
- pinfo.add_center2(prim,mesh->getNumSubGrids(j));
- }
- return pinfo;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- numPrimitives = pinfo.size();
-
- /* resize arrays */
- sgrids.resize(numPrimitives);
- prims.resize(numPrimitives);
-
- /* second run to fill primrefs and SubGridBuildData arrays */
- pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
- k = base.size();
- size_t p_index = k;
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!mesh->valid(j)) continue;
- const GridMesh::Grid &g = mesh->grid(j);
- for (unsigned int y=0; y<g.resY-1u; y+=2)
- for (unsigned int x=0; x<g.resX-1u; x+=2)
- {
- BBox3fa bounds = empty;
- if (!mesh->buildBounds(g,x,y,bounds)) continue; // get bounds of subgrid
- const PrimRef prim(bounds,(unsigned)geomID,(unsigned)p_index);
- pinfo.add_center2(prim);
- sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
- prims[p_index++] = prim;
- }
- }
- return pinfo;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- assert(pinfo.size() == numPrimitives);
- }
- else
- {
- ParallelPrefixSumState<PrimInfo> pstate;
- /* iterate over all grids in a single mesh */
- pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!mesh->valid(j)) continue;
- BBox3fa bounds = empty;
- const PrimRef prim(bounds,geomID_,unsigned(j));
- pinfo.add_center2(prim,mesh->getNumSubGrids(j));
- }
- return pinfo;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- numPrimitives = pinfo.size();
- /* resize arrays */
- sgrids.resize(numPrimitives);
- prims.resize(numPrimitives);
-
- /* second run to fill primrefs and SubGridBuildData arrays */
- pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo
- {
-
- size_t p_index = base.size();
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!mesh->valid(j)) continue;
- const GridMesh::Grid &g = mesh->grid(j);
- for (unsigned int y=0; y<g.resY-1u; y+=2)
- for (unsigned int x=0; x<g.resX-1u; x+=2)
- {
- BBox3fa bounds = empty;
- if (!mesh->buildBounds(g,x,y,bounds)) continue; // get bounds of subgrid
- const PrimRef prim(bounds,geomID_,unsigned(p_index));
- pinfo.add_center2(prim);
- sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
- prims[p_index++] = prim;
- }
- }
- return pinfo;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- }
-
- /* no primitives */
- if (numPrimitives == 0) {
- bvh->clear();
- prims.clear();
- sgrids.clear();
- return;
- }
-
- double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + "BuilderSAH");
-
- /* create primref array */
- settings.primrefarrayalloc = numPrimitives/1000;
- if (settings.primrefarrayalloc < 1000)
- settings.primrefarrayalloc = inf;
-
- /* enable os_malloc for two level build */
- if (mesh)
- bvh->alloc.setOSallocation(true);
-
- /* initialize allocator */
- const size_t node_bytes = numPrimitives*sizeof(typename BVH::AABBNodeMB)/(4*N);
- const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
-
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
-
- /* pinfo might has zero size due to invalid geometry */
- if (unlikely(pinfo.size() == 0))
- {
- bvh->clear();
- sgrids.clear();
- prims.clear();
- return;
- }
-
- /* call BVH builder */
- NodeRef root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeafGrid<N,SubGridQBVHN<N>>(bvh,sgrids.data()),bvh->scene->progressInterface,prims.data(),pinfo,settings);
- bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
- bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
-
- /* clear temporary array */
- sgrids.clear();
-
- /* if we allocated using the primrefarray we have to keep it alive */
- if (settings.primrefarrayalloc != size_t(inf))
- bvh->alloc.share(prims);
-
- /* for static geometries we can do some cleanups */
- else if (scene && scene->isStaticAccel()) {
- prims.clear();
- }
- bvh->cleanup();
- bvh->postBuild(t0);
- }
-
- void clear() {
- prims.clear();
- }
- };
-
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- Builder* BVH4Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH4Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4v>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH4Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4i>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
-
- Builder* BVH4Triangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH4Triangle4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4v>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH4Triangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type,true); }
-
-
- Builder* BVH4QuantizedTriangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
-#if defined(__AVX__)
- Builder* BVH8Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH8Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4v>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH8Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4i>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
-
- Builder* BVH8Triangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH8Triangle4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4v>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH8Triangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type,true); }
- Builder* BVH8QuantizedTriangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH8QuantizedTriangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Triangle4>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
-
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_QUAD)
- Builder* BVH4Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Quad4v>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
- Builder* BVH4Quad4iMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Quad4i>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
- Builder* BVH4Quad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Quad4v>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
- Builder* BVH4Quad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type,true); }
- Builder* BVH4QuantizedQuad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Quad4v>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
- Builder* BVH4QuantizedQuad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
-
-#if defined(__AVX__)
- Builder* BVH8Quad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Quad4v>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
- Builder* BVH8Quad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type,true); }
- Builder* BVH8QuantizedQuad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Quad4v>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
- Builder* BVH8QuantizedQuad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
- Builder* BVH8Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Quad4v>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
-
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_USER)
-
- Builder* BVH4VirtualSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
- int minLeafSize = scene->device->object_accel_min_leaf_size;
- int maxLeafSize = scene->device->object_accel_max_leaf_size;
- return new BVHNBuilderSAH<4,Object>((BVH4*)bvh,scene,4,1.0f,minLeafSize,maxLeafSize,UserGeometry::geom_type);
- }
-
- Builder* BVH4VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) {
- return new BVHNBuilderSAH<4,Object>((BVH4*)bvh,mesh,geomID,4,1.0f,1,inf,UserGeometry::geom_type);
- }
-#if defined(__AVX__)
-
- Builder* BVH8VirtualSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
- int minLeafSize = scene->device->object_accel_min_leaf_size;
- int maxLeafSize = scene->device->object_accel_max_leaf_size;
- return new BVHNBuilderSAH<8,Object>((BVH8*)bvh,scene,8,1.0f,minLeafSize,maxLeafSize,UserGeometry::geom_type);
- }
-
- Builder* BVH8VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) {
- return new BVHNBuilderSAH<8,Object>((BVH8*)bvh,mesh,geomID,8,1.0f,1,inf,UserGeometry::geom_type);
- }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- Builder* BVH4InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype); }
- Builder* BVH4InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
- return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,mesh,geomID,4,1.0f,1,inf,gtype);
- }
-#if defined(__AVX__)
- Builder* BVH8InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype); }
- Builder* BVH8InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
- return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,mesh,geomID,8,1.0f,1,inf,gtype);
- }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_GRID)
- Builder* BVH4GridMeshBuilderSAH (void* bvh, GridMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAHGrid<4>((BVH4*)bvh,mesh,geomID,4,1.0f,4,4,mode); }
- Builder* BVH4GridSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHGrid<4>((BVH4*)bvh,scene,4,1.0f,4,4,mode); } // FIXME: check whether cost factors are correct
-
-#if defined(__AVX__)
- Builder* BVH8GridMeshBuilderSAH (void* bvh, GridMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAHGrid<8>((BVH8*)bvh,mesh,geomID,8,1.0f,8,8,mode); }
- Builder* BVH8GridSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHGrid<8>((BVH8*)bvh,scene,8,1.0f,8,8,mode); } // FIXME: check whether cost factors are correct
-#endif
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_mb.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_mb.cpp
deleted file mode 100644
index 9c01553ec6..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_mb.cpp
+++ /dev/null
@@ -1,705 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh.h"
-#include "bvh_builder.h"
-#include "../builders/bvh_builder_msmblur.h"
-
-#include "../builders/primrefgen.h"
-#include "../builders/splitter.h"
-
-#include "../geometry/linei.h"
-#include "../geometry/triangle.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/trianglev_mb.h"
-#include "../geometry/trianglei.h"
-#include "../geometry/quadv.h"
-#include "../geometry/quadi.h"
-#include "../geometry/object.h"
-#include "../geometry/instance.h"
-#include "../geometry/subgrid.h"
-
-#include "../common/state.h"
-
-// FIXME: remove after removing BVHNBuilderMBlurRootTimeSplitsSAH
-#include "../../common/algorithms/parallel_for_for.h"
-#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
-
-
-namespace embree
-{
- namespace isa
- {
-
-#if 0
- template<int N, typename Primitive>
- struct CreateMBlurLeaf
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecordMB NodeRecordMB;
-
- __forceinline CreateMBlurLeaf (BVH* bvh, PrimRef* prims, size_t time) : bvh(bvh), prims(prims), time(time) {}
-
- __forceinline NodeRecordMB operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
- {
- size_t items = Primitive::blocks(set.size());
- size_t start = set.begin();
- for (size_t i=start; i<end; i++) assert((*current.prims.prims)[start].geomID() == (*current.prims.prims)[i].geomID()); // assert that all geomIDs are identical
- Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
- NodeRef node = bvh->encodeLeaf((char*)accel,items);
-
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<items; i++)
- allBounds.extend(accel[i].fillMB(prims, start, set.end(), bvh->scene, time));
-
- return NodeRecordMB(node,allBounds);
- }
-
- BVH* bvh;
- PrimRef* prims;
- size_t time;
- };
-#endif
-
- template<int N, typename Mesh, typename Primitive>
- struct CreateMSMBlurLeaf
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecordMB4D NodeRecordMB4D;
-
- __forceinline CreateMSMBlurLeaf (BVH* bvh) : bvh(bvh) {}
-
- __forceinline const NodeRecordMB4D operator() (const BVHBuilderMSMBlur::BuildRecord& current, const FastAllocator::CachedAllocator& alloc) const
- {
- size_t items = Primitive::blocks(current.prims.size());
- size_t start = current.prims.begin();
- size_t end = current.prims.end();
- for (size_t i=start; i<end; i++) assert((*current.prims.prims)[start].geomID() == (*current.prims.prims)[i].geomID()); // assert that all geomIDs are identical
- Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteNodeAlignment);
- NodeRef node = bvh->encodeLeaf((char*)accel,items);
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<items; i++)
- allBounds.extend(accel[i].fillMB(current.prims.prims->data(), start, current.prims.end(), bvh->scene, current.prims.time_range));
- return NodeRecordMB4D(node,allBounds,current.prims.time_range);
- }
-
- BVH* bvh;
- };
-
- /* Motion blur BVH with 4D nodes and internal time splits */
- template<int N, typename Mesh, typename Primitive>
- struct BVHNBuilderMBlurSAH : public Builder
- {
- typedef BVHN<N> BVH;
- typedef typename BVHN<N>::NodeRef NodeRef;
- typedef typename BVHN<N>::NodeRecordMB NodeRecordMB;
- typedef typename BVHN<N>::AABBNodeMB AABBNodeMB;
-
- BVH* bvh;
- Scene* scene;
- const size_t sahBlockSize;
- const float intCost;
- const size_t minLeafSize;
- const size_t maxLeafSize;
- const Geometry::GTypeMask gtype_;
-
- BVHNBuilderMBlurSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
- : bvh(bvh), scene(scene), sahBlockSize(sahBlockSize), intCost(intCost), minLeafSize(minLeafSize), maxLeafSize(min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks)), gtype_(gtype) {}
-
- void build()
- {
- /* skip build for empty scene */
- const size_t numPrimitives = scene->getNumPrimitives(gtype_,true);
- if (numPrimitives == 0) { bvh->clear(); return; }
-
- double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderMBlurSAH");
-
-#if PROFILE
- profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
-#endif
-
- //const size_t numTimeSteps = scene->getNumTimeSteps<typename Mesh::type_t,true>();
- //const size_t numTimeSegments = numTimeSteps-1; assert(numTimeSteps > 1);
-
- /*if (numTimeSegments == 1)
- buildSingleSegment(numPrimitives);
- else*/
- buildMultiSegment(numPrimitives);
-
-#if PROFILE
- });
-#endif
-
- /* clear temporary data for static geometry */
- bvh->cleanup();
- bvh->postBuild(t0);
- }
-
-#if 0 // No longer compatible when time_ranges are present for geometries. Would have to create temporal nodes sometimes, and put only a single geometry into leaf.
- void buildSingleSegment(size_t numPrimitives)
- {
- /* create primref array */
- mvector<PrimRef> prims(scene->device,numPrimitives);
- const PrimInfo pinfo = createPrimRefArrayMBlur(scene,gtype_,prims,bvh->scene->progressInterface,0);
- /* early out if no valid primitives */
- if (pinfo.size() == 0) { bvh->clear(); return; }
- /* estimate acceleration structure size */
- const size_t node_bytes = pinfo.size()*sizeof(AABBNodeMB)/(4*N);
- const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
-
- /* settings for BVH build */
- GeneralBVHBuilder::Settings settings;
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxBuildDepthLeaf;
- settings.logBlockSize = bsr(sahBlockSize);
- settings.minLeafSize = min(minLeafSize,maxLeafSize);
- settings.maxLeafSize = maxLeafSize;
- settings.travCost = travCost;
- settings.intCost = intCost;
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
-
- /* build hierarchy */
- auto root = BVHBuilderBinnedSAH::build<NodeRecordMB>
- (typename BVH::CreateAlloc(bvh),typename BVH::AABBNodeMB::Create(),typename BVH::AABBNodeMB::Set(),
- CreateMBlurLeaf<N,Primitive>(bvh,prims.data(),0),bvh->scene->progressInterface,
- prims.data(),pinfo,settings);
-
- bvh->set(root.ref,root.lbounds,pinfo.size());
- }
-#endif
-
- void buildMultiSegment(size_t numPrimitives)
- {
- /* create primref array */
- mvector<PrimRefMB> prims(scene->device,numPrimitives);
- PrimInfoMB pinfo = createPrimRefArrayMSMBlur(scene,gtype_,prims,bvh->scene->progressInterface);
-
- /* early out if no valid primitives */
- if (pinfo.size() == 0) { bvh->clear(); return; }
-
- /* estimate acceleration structure size */
- const size_t node_bytes = pinfo.num_time_segments*sizeof(AABBNodeMB)/(4*N);
- const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.num_time_segments)*sizeof(Primitive));
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
-
- /* settings for BVH build */
- BVHBuilderMSMBlur::Settings settings;
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxDepth;
- settings.logBlockSize = bsr(sahBlockSize);
- settings.minLeafSize = min(minLeafSize,maxLeafSize);
- settings.maxLeafSize = maxLeafSize;
- settings.travCost = travCost;
- settings.intCost = intCost;
- settings.singleLeafTimeSegment = Primitive::singleTimeSegment;
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
-
- /* build hierarchy */
- auto root =
- BVHBuilderMSMBlur::build<NodeRef>(prims,pinfo,scene->device,
- RecalculatePrimRef<Mesh>(scene),
- typename BVH::CreateAlloc(bvh),
- typename BVH::AABBNodeMB4D::Create(),
- typename BVH::AABBNodeMB4D::Set(),
- CreateMSMBlurLeaf<N,Mesh,Primitive>(bvh),
- bvh->scene->progressInterface,
- settings);
-
- bvh->set(root.ref,root.lbounds,pinfo.num_time_segments);
- }
-
- void clear() {
- }
- };
-
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
- struct GridRecalculatePrimRef
- {
- Scene* scene;
- const SubGridBuildData * const sgrids;
-
- __forceinline GridRecalculatePrimRef (Scene* scene, const SubGridBuildData * const sgrids)
- : scene(scene), sgrids(sgrids) {}
-
- __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
- {
- const unsigned int geomID = prim.geomID();
- const GridMesh* mesh = scene->get<GridMesh>(geomID);
- const unsigned int buildID = prim.primID();
- const SubGridBuildData &subgrid = sgrids[buildID];
- const unsigned int primID = subgrid.primID;
- const size_t x = subgrid.x();
- const size_t y = subgrid.y();
- const LBBox3fa lbounds = mesh->linearBounds(mesh->grid(primID),x,y,time_range);
- const unsigned num_time_segments = mesh->numTimeSegments();
- const range<int> tbounds = mesh->timeSegmentRange(time_range);
- return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, num_time_segments, geomID, buildID);
- }
-
- __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
- const unsigned int geomID = prim.geomID();
- const GridMesh* mesh = scene->get<GridMesh>(geomID);
- const unsigned int buildID = prim.primID();
- const SubGridBuildData &subgrid = sgrids[buildID];
- const unsigned int primID = subgrid.primID;
- const size_t x = subgrid.x();
- const size_t y = subgrid.y();
- return mesh->linearBounds(mesh->grid(primID),x,y,time_range);
- }
-
- };
-
- template<int N>
- struct CreateMSMBlurLeafGrid
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecordMB4D NodeRecordMB4D;
-
- __forceinline CreateMSMBlurLeafGrid (Scene* scene, BVH* bvh, const SubGridBuildData * const sgrids) : scene(scene), bvh(bvh), sgrids(sgrids) {}
-
- __forceinline const NodeRecordMB4D operator() (const BVHBuilderMSMBlur::BuildRecord& current, const FastAllocator::CachedAllocator& alloc) const
- {
- const size_t items = current.prims.size();
- const size_t start = current.prims.begin();
-
- const PrimRefMB* prims = current.prims.prims->data();
- /* collect all subsets with unique geomIDs */
- assert(items <= N);
- unsigned int geomIDs[N];
- unsigned int num_geomIDs = 1;
- geomIDs[0] = prims[start].geomID();
-
- for (size_t i=1;i<items;i++)
- {
- bool found = false;
- const unsigned int new_geomID = prims[start+i].geomID();
- for (size_t j=0;j<num_geomIDs;j++)
- if (new_geomID == geomIDs[j])
- { found = true; break; }
- if (!found)
- geomIDs[num_geomIDs++] = new_geomID;
- }
-
- /* allocate all leaf memory in one single block */
- SubGridMBQBVHN<N>* accel = (SubGridMBQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridMBQBVHN<N>),BVH::byteAlignment);
- typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,num_geomIDs);
-
- LBBox3fa allBounds = empty;
-
- for (size_t g=0;g<num_geomIDs;g++)
- {
- const GridMesh* __restrict__ const mesh = scene->get<GridMesh>(geomIDs[g]);
- unsigned int x[N];
- unsigned int y[N];
- unsigned int primID[N];
- BBox3fa bounds0[N];
- BBox3fa bounds1[N];
- unsigned int pos = 0;
- for (size_t i=0;i<items;i++)
- {
- if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
-
- const SubGridBuildData &sgrid_bd = sgrids[prims[start+i].primID()];
- x[pos] = sgrid_bd.sx;
- y[pos] = sgrid_bd.sy;
- primID[pos] = sgrid_bd.primID;
- const size_t x = sgrid_bd.x();
- const size_t y = sgrid_bd.y();
- LBBox3fa newBounds = mesh->linearBounds(mesh->grid(sgrid_bd.primID),x,y,current.prims.time_range);
- allBounds.extend(newBounds);
- bounds0[pos] = newBounds.bounds0;
- bounds1[pos] = newBounds.bounds1;
- pos++;
- }
- assert(pos <= N);
- new (&accel[g]) SubGridMBQBVHN<N>(x,y,primID,bounds0,bounds1,geomIDs[g],current.prims.time_range.lower,1.0f/current.prims.time_range.size(),pos);
- }
- return NodeRecordMB4D(node,allBounds,current.prims.time_range);
- }
-
- Scene *scene;
- BVH* bvh;
- const SubGridBuildData * const sgrids;
- };
-
-#if 0
- template<int N>
- struct CreateLeafGridMB
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecordMB NodeRecordMB;
-
- __forceinline CreateLeafGridMB (Scene* scene, BVH* bvh, const SubGridBuildData * const sgrids)
- : scene(scene), bvh(bvh), sgrids(sgrids) {}
-
- __forceinline NodeRecordMB operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
- {
- const size_t items = set.size();
- const size_t start = set.begin();
-
- /* collect all subsets with unique geomIDs */
- assert(items <= N);
- unsigned int geomIDs[N];
- unsigned int num_geomIDs = 1;
- geomIDs[0] = prims[start].geomID();
-
- for (size_t i=1;i<items;i++)
- {
- bool found = false;
- const unsigned int new_geomID = prims[start+i].geomID();
- for (size_t j=0;j<num_geomIDs;j++)
- if (new_geomID == geomIDs[j])
- { found = true; break; }
- if (!found)
- geomIDs[num_geomIDs++] = new_geomID;
- }
-
- /* allocate all leaf memory in one single block */
- SubGridMBQBVHN<N>* accel = (SubGridMBQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridMBQBVHN<N>),BVH::byteAlignment);
- typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,num_geomIDs);
-
- LBBox3fa allBounds = empty;
-
- for (size_t g=0;g<num_geomIDs;g++)
- {
- const GridMesh* __restrict__ const mesh = scene->get<GridMesh>(geomIDs[g]);
-
- unsigned int x[N];
- unsigned int y[N];
- unsigned int primID[N];
- BBox3fa bounds0[N];
- BBox3fa bounds1[N];
- unsigned int pos = 0;
- for (size_t i=0;i<items;i++)
- {
- if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
-
- const SubGridBuildData &sgrid_bd = sgrids[prims[start+i].primID()];
- x[pos] = sgrid_bd.sx;
- y[pos] = sgrid_bd.sy;
- primID[pos] = sgrid_bd.primID;
- const size_t x = sgrid_bd.x();
- const size_t y = sgrid_bd.y();
- bool MAYBE_UNUSED valid0 = mesh->buildBounds(mesh->grid(sgrid_bd.primID),x,y,0,bounds0[pos]);
- bool MAYBE_UNUSED valid1 = mesh->buildBounds(mesh->grid(sgrid_bd.primID),x,y,1,bounds1[pos]);
- assert(valid0);
- assert(valid1);
- allBounds.extend(LBBox3fa(bounds0[pos],bounds1[pos]));
- pos++;
- }
- new (&accel[g]) SubGridMBQBVHN<N>(x,y,primID,bounds0,bounds1,geomIDs[g],0.0f,1.0f,pos);
- }
- return NodeRecordMB(node,allBounds);
- }
-
- Scene *scene;
- BVH* bvh;
- const SubGridBuildData * const sgrids;
- };
-#endif
-
-
- /* Motion blur BVH with 4D nodes and internal time splits */
- template<int N>
- struct BVHNBuilderMBlurSAHGrid : public Builder
- {
- typedef BVHN<N> BVH;
- typedef typename BVHN<N>::NodeRef NodeRef;
- typedef typename BVHN<N>::NodeRecordMB NodeRecordMB;
- typedef typename BVHN<N>::AABBNodeMB AABBNodeMB;
-
- BVH* bvh;
- Scene* scene;
- const size_t sahBlockSize;
- const float intCost;
- const size_t minLeafSize;
- const size_t maxLeafSize;
- mvector<SubGridBuildData> sgrids;
-
-
- BVHNBuilderMBlurSAHGrid (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize)
- : bvh(bvh), scene(scene), sahBlockSize(sahBlockSize), intCost(intCost), minLeafSize(minLeafSize), maxLeafSize(min(maxLeafSize,BVH::maxLeafBlocks)), sgrids(scene->device,0) {}
-
-
- PrimInfo createPrimRefArrayMBlurGrid(Scene* scene, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime)
- {
- /* first run to get #primitives */
- ParallelForForPrefixSumState<PrimInfo> pstate;
- Scene::Iterator<GridMesh,true> iter(scene);
-
- pstate.init(iter,size_t(1024));
-
- /* iterate over all meshes in the scene */
- PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
-
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!mesh->valid(j,range<size_t>(0,1))) continue;
- BBox3fa bounds = empty;
- const PrimRef prim(bounds,unsigned(geomID),unsigned(j));
- pinfo.add_center2(prim,mesh->getNumSubGrids(j));
- }
- return pinfo;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- size_t numPrimitives = pinfo.size();
- if (numPrimitives == 0) return pinfo;
-
- /* resize arrays */
- sgrids.resize(numPrimitives);
- prims.resize(numPrimitives);
-
- /* second run to fill primrefs and SubGridBuildData arrays */
- pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
-
- k = base.size();
- size_t p_index = k;
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- const GridMesh::Grid &g = mesh->grid(j);
- if (!mesh->valid(j,range<size_t>(0,1))) continue;
-
- for (unsigned int y=0; y<g.resY-1u; y+=2)
- for (unsigned int x=0; x<g.resX-1u; x+=2)
- {
- BBox3fa bounds = empty;
- if (!mesh->buildBounds(g,x,y,itime,bounds)) continue; // get bounds of subgrid
- const PrimRef prim(bounds,unsigned(geomID),unsigned(p_index));
- pinfo.add_center2(prim);
- sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
- prims[p_index++] = prim;
- }
- }
- return pinfo;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- assert(pinfo.size() == numPrimitives);
- return pinfo;
- }
-
- PrimInfoMB createPrimRefArrayMSMBlurGrid(Scene* scene, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f))
- {
- /* first run to get #primitives */
- ParallelForForPrefixSumState<PrimInfoMB> pstate;
- Scene::Iterator<GridMesh,true> iter(scene);
-
- pstate.init(iter,size_t(1024));
- /* iterate over all meshes in the scene */
- PrimInfoMB pinfoMB = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t /*geomID*/) -> PrimInfoMB {
-
- PrimInfoMB pinfoMB(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue;
- LBBox3fa bounds(empty);
- PrimInfoMB gridMB(0,mesh->getNumSubGrids(j));
- pinfoMB.merge(gridMB);
- }
- return pinfoMB;
- }, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
-
- size_t numPrimitives = pinfoMB.size();
- if (numPrimitives == 0) return pinfoMB;
-
- /* resize arrays */
- sgrids.resize(numPrimitives);
- prims.resize(numPrimitives);
- /* second run to fill primrefs and SubGridBuildData arrays */
- pinfoMB = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
-
- k = base.size();
- size_t p_index = k;
- PrimInfoMB pinfoMB(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue;
- const GridMesh::Grid &g = mesh->grid(j);
-
- for (unsigned int y=0; y<g.resY-1u; y+=2)
- for (unsigned int x=0; x<g.resX-1u; x+=2)
- {
- const PrimRefMB prim(mesh->linearBounds(g,x,y,t0t1),mesh->numTimeSegments(),mesh->time_range,mesh->numTimeSegments(),unsigned(geomID),unsigned(p_index));
- pinfoMB.add_primref(prim);
- sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
- prims[p_index++] = prim;
- }
- }
- return pinfoMB;
- }, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
-
- assert(pinfoMB.size() == numPrimitives);
- pinfoMB.time_range = t0t1;
- return pinfoMB;
- }
-
- void build()
- {
- /* skip build for empty scene */
- const size_t numPrimitives = scene->getNumPrimitives(GridMesh::geom_type,true);
- if (numPrimitives == 0) { bvh->clear(); return; }
-
- double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderMBlurSAHGrid");
-
- //const size_t numTimeSteps = scene->getNumTimeSteps<GridMesh,true>();
- //const size_t numTimeSegments = numTimeSteps-1; assert(numTimeSteps > 1);
- //if (numTimeSegments == 1)
- // buildSingleSegment(numPrimitives);
- //else
- buildMultiSegment(numPrimitives);
-
- /* clear temporary data for static geometry */
- bvh->cleanup();
- bvh->postBuild(t0);
- }
-
-#if 0
- void buildSingleSegment(size_t numPrimitives)
- {
- /* create primref array */
- mvector<PrimRef> prims(scene->device,numPrimitives);
- const PrimInfo pinfo = createPrimRefArrayMBlurGrid(scene,prims,bvh->scene->progressInterface,0);
- /* early out if no valid primitives */
- if (pinfo.size() == 0) { bvh->clear(); return; }
-
- /* estimate acceleration structure size */
- const size_t node_bytes = pinfo.size()*sizeof(AABBNodeMB)/(4*N);
- //TODO: check leaf_bytes
- const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
-
- /* settings for BVH build */
- GeneralBVHBuilder::Settings settings;
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxBuildDepthLeaf;
- settings.logBlockSize = bsr(sahBlockSize);
- settings.minLeafSize = min(minLeafSize,maxLeafSize);
- settings.maxLeafSize = maxLeafSize;
- settings.travCost = travCost;
- settings.intCost = intCost;
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
-
- /* build hierarchy */
- auto root = BVHBuilderBinnedSAH::build<NodeRecordMB>
- (typename BVH::CreateAlloc(bvh),
- typename BVH::AABBNodeMB::Create(),
- typename BVH::AABBNodeMB::Set(),
- CreateLeafGridMB<N>(scene,bvh,sgrids.data()),
- bvh->scene->progressInterface,
- prims.data(),pinfo,settings);
-
- bvh->set(root.ref,root.lbounds,pinfo.size());
- }
-#endif
-
- void buildMultiSegment(size_t numPrimitives)
- {
- /* create primref array */
- mvector<PrimRefMB> prims(scene->device,numPrimitives);
- PrimInfoMB pinfo = createPrimRefArrayMSMBlurGrid(scene,prims,bvh->scene->progressInterface);
-
- /* early out if no valid primitives */
- if (pinfo.size() == 0) { bvh->clear(); return; }
-
-
-
- GridRecalculatePrimRef recalculatePrimRef(scene,sgrids.data());
-
- /* estimate acceleration structure size */
- const size_t node_bytes = pinfo.num_time_segments*sizeof(AABBNodeMB)/(4*N);
- //FIXME: check leaf_bytes
- //const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.num_time_segments)*sizeof(SubGridQBVHN<N>));
- const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
-
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
-
- /* settings for BVH build */
- BVHBuilderMSMBlur::Settings settings;
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxDepth;
- settings.logBlockSize = bsr(sahBlockSize);
- settings.minLeafSize = min(minLeafSize,maxLeafSize);
- settings.maxLeafSize = maxLeafSize;
- settings.travCost = travCost;
- settings.intCost = intCost;
- settings.singleLeafTimeSegment = false;
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
-
- /* build hierarchy */
- auto root =
- BVHBuilderMSMBlur::build<NodeRef>(prims,pinfo,scene->device,
- recalculatePrimRef,
- typename BVH::CreateAlloc(bvh),
- typename BVH::AABBNodeMB4D::Create(),
- typename BVH::AABBNodeMB4D::Set(),
- CreateMSMBlurLeafGrid<N>(scene,bvh,sgrids.data()),
- bvh->scene->progressInterface,
- settings);
- bvh->set(root.ref,root.lbounds,pinfo.num_time_segments);
- }
-
- void clear() {
- }
- };
-
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- Builder* BVH4Triangle4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,TriangleMesh,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
- Builder* BVH4Triangle4vMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,TriangleMesh,Triangle4vMB>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
-#if defined(__AVX__)
- Builder* BVH8Triangle4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,TriangleMesh,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
- Builder* BVH8Triangle4vMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,TriangleMesh,Triangle4vMB>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_QUAD)
- Builder* BVH4Quad4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,QuadMesh,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_QUAD_MESH); }
-#if defined(__AVX__)
- Builder* BVH8Quad4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,QuadMesh,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_QUAD_MESH); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_USER)
- Builder* BVH4VirtualMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
- int minLeafSize = scene->device->object_accel_mb_min_leaf_size;
- int maxLeafSize = scene->device->object_accel_mb_max_leaf_size;
- return new BVHNBuilderMBlurSAH<4,UserGeometry,Object>((BVH4*)bvh,scene,4,1.0f,minLeafSize,maxLeafSize,Geometry::MTY_USER_GEOMETRY);
- }
-#if defined(__AVX__)
- Builder* BVH8VirtualMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
- int minLeafSize = scene->device->object_accel_mb_min_leaf_size;
- int maxLeafSize = scene->device->object_accel_mb_max_leaf_size;
- return new BVHNBuilderMBlurSAH<8,UserGeometry,Object>((BVH8*)bvh,scene,8,1.0f,minLeafSize,maxLeafSize,Geometry::MTY_USER_GEOMETRY);
- }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- Builder* BVH4InstanceMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<4,Instance,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype); }
-#if defined(__AVX__)
- Builder* BVH8InstanceMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<8,Instance,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_GRID)
- Builder* BVH4GridMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAHGrid<4>((BVH4*)bvh,scene,4,1.0f,4,4); }
-#if defined(__AVX__)
- Builder* BVH8GridMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAHGrid<8>((BVH8*)bvh,scene,8,1.0f,8,8); }
-#endif
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_spatial.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_spatial.cpp
deleted file mode 100644
index 285b38c39d..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_spatial.cpp
+++ /dev/null
@@ -1,201 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh.h"
-#include "bvh_builder.h"
-
-#include "../builders/primrefgen.h"
-#include "../builders/primrefgen_presplit.h"
-#include "../builders/splitter.h"
-
-#include "../geometry/linei.h"
-#include "../geometry/triangle.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/trianglev_mb.h"
-#include "../geometry/trianglei.h"
-#include "../geometry/quadv.h"
-#include "../geometry/quadi.h"
-#include "../geometry/object.h"
-#include "../geometry/instance.h"
-#include "../geometry/subgrid.h"
-
-#include "../common/state.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int N, typename Primitive>
- struct CreateLeafSpatial
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
-
- __forceinline CreateLeafSpatial (BVH* bvh) : bvh(bvh) {}
-
- __forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
- {
- size_t n = set.size();
- size_t items = Primitive::blocks(n);
- size_t start = set.begin();
- Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
- typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
- for (size_t i=0; i<items; i++) {
- accel[i].fill(prims,start,set.end(),bvh->scene);
- }
- return node;
- }
-
- BVH* bvh;
- };
-
- template<int N, typename Mesh, typename Primitive, typename Splitter>
- struct BVHNBuilderFastSpatialSAH : public Builder
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- BVH* bvh;
- Scene* scene;
- Mesh* mesh;
- mvector<PrimRef> prims0;
- GeneralBVHBuilder::Settings settings;
- const float splitFactor;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- unsigned int numPreviousPrimitives = 0;
-
- BVHNBuilderFastSpatialSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
- : bvh(bvh), scene(scene), mesh(nullptr), prims0(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD),
- splitFactor(scene->device->max_spatial_split_replications) {}
-
- BVHNBuilderFastSpatialSAH (BVH* bvh, Mesh* mesh, const unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
- : bvh(bvh), scene(nullptr), mesh(mesh), prims0(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD),
- splitFactor(scene->device->max_spatial_split_replications), geomID_(geomID) {}
-
- // FIXME: shrink bvh->alloc in destructor here and in other builders too
-
- void build()
- {
- /* we reset the allocator when the mesh size changed */
- if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
- bvh->alloc.clear();
- }
-
- /* skip build for empty scene */
- const size_t numOriginalPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(Mesh::geom_type,false);
- numPreviousPrimitives = numOriginalPrimitives;
- if (numOriginalPrimitives == 0) {
- prims0.clear();
- bvh->clear();
- return;
- }
-
- const unsigned int maxGeomID = mesh ? geomID_ : scene->getMaxGeomID<Mesh,false>();
- const bool usePreSplits = scene->device->useSpatialPreSplits || (maxGeomID >= ((unsigned int)1 << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)));
- double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + (usePreSplits ? "BuilderFastSpatialPresplitSAH" : "BuilderFastSpatialSAH"));
-
- /* create primref array */
- const size_t numSplitPrimitives = max(numOriginalPrimitives,size_t(splitFactor*numOriginalPrimitives));
- prims0.resize(numSplitPrimitives);
-
- /* enable os_malloc for two level build */
- if (mesh)
- bvh->alloc.setOSallocation(true);
-
- NodeRef root(0);
- PrimInfo pinfo;
-
-
- if (likely(usePreSplits))
- {
- /* spatial presplit SAH BVH builder */
- pinfo = mesh ?
- createPrimRefArray_presplit<Mesh,Splitter>(mesh,maxGeomID,numOriginalPrimitives,prims0,bvh->scene->progressInterface) :
- createPrimRefArray_presplit<Mesh,Splitter>(scene,Mesh::geom_type,false,numOriginalPrimitives,prims0,bvh->scene->progressInterface);
-
- const size_t node_bytes = pinfo.size()*sizeof(typename BVH::AABBNode)/(4*N);
- const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
-
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxBuildDepthLeaf;
-
- /* call BVH builder */
- root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeafSpatial<N,Primitive>(bvh),bvh->scene->progressInterface,prims0.data(),pinfo,settings);
- }
- else
- {
- /* standard spatial split SAH BVH builder */
- pinfo = mesh ?
- createPrimRefArray(mesh,geomID_,/*numSplitPrimitives,*/prims0,bvh->scene->progressInterface) :
- createPrimRefArray(scene,Mesh::geom_type,false,/*numSplitPrimitives,*/prims0,bvh->scene->progressInterface);
-
- Splitter splitter(scene);
-
- const size_t node_bytes = pinfo.size()*sizeof(typename BVH::AABBNode)/(4*N);
- const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
-
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxBuildDepthLeaf;
-
- /* call BVH builder */
- root = BVHBuilderBinnedFastSpatialSAH::build<NodeRef>(
- typename BVH::CreateAlloc(bvh),
- typename BVH::AABBNode::Create2(),
- typename BVH::AABBNode::Set2(),
- CreateLeafSpatial<N,Primitive>(bvh),
- splitter,
- bvh->scene->progressInterface,
- prims0.data(),
- numSplitPrimitives,
- pinfo,settings);
-
- /* ==================== */
- }
-
- bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
- bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
-
- /* clear temporary data for static geometry */
- if (scene && scene->isStaticAccel()) {
- prims0.clear();
- }
- bvh->cleanup();
- bvh->postBuild(t0);
- }
-
- void clear() {
- prims0.clear();
- }
- };
-
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
-
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
-
- Builder* BVH4Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
- Builder* BVH4Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
- Builder* BVH4Triangle4iSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4i,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
-
-#if defined(__AVX__)
- Builder* BVH8Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
- Builder* BVH8Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_QUAD)
- Builder* BVH4Quad4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,QuadMesh,Quad4v,QuadSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
-
-#if defined(__AVX__)
- Builder* BVH8Quad4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,QuadMesh,Quad4v,QuadSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
-#endif
-
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.cpp
deleted file mode 100644
index 1a78f347ac..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.cpp
+++ /dev/null
@@ -1,377 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh_builder_twolevel.h"
-#include "bvh_statistics.h"
-#include "../builders/bvh_builder_sah.h"
-#include "../common/scene_line_segments.h"
-#include "../common/scene_triangle_mesh.h"
-#include "../common/scene_quad_mesh.h"
-
-#define PROFILE 0
-
-namespace embree
-{
- namespace isa
- {
- template<int N, typename Mesh, typename Primitive>
- BVHNBuilderTwoLevel<N,Mesh,Primitive>::BVHNBuilderTwoLevel (BVH* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder, const size_t singleThreadThreshold)
- : bvh(bvh), scene(scene), refs(scene->device,0), prims(scene->device,0), singleThreadThreshold(singleThreadThreshold), gtype(gtype), useMortonBuilder_(useMortonBuilder) {}
-
- template<int N, typename Mesh, typename Primitive>
- BVHNBuilderTwoLevel<N,Mesh,Primitive>::~BVHNBuilderTwoLevel () {
- }
-
- // ===========================================================================
- // ===========================================================================
- // ===========================================================================
-
- template<int N, typename Mesh, typename Primitive>
- void BVHNBuilderTwoLevel<N,Mesh,Primitive>::build()
- {
- /* delete some objects */
- size_t num = scene->size();
- if (num < bvh->objects.size()) {
- parallel_for(num, bvh->objects.size(), [&] (const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- builders[i].reset();
- delete bvh->objects[i]; bvh->objects[i] = nullptr;
- }
- });
- }
-
-#if PROFILE
- while(1)
-#endif
- {
- /* reset memory allocator */
- bvh->alloc.reset();
-
- /* skip build for empty scene */
- const size_t numPrimitives = scene->getNumPrimitives(gtype,false);
-
- if (numPrimitives == 0) {
- prims.resize(0);
- bvh->set(BVH::emptyNode,empty,0);
- return;
- }
-
- /* calculate the size of the entire BVH */
- const size_t numLeafBlocks = Primitive::blocks(numPrimitives);
- const size_t node_bytes = 2*numLeafBlocks*sizeof(typename BVH::AABBNode)/N;
- const size_t leaf_bytes = size_t(1.2*numLeafBlocks*sizeof(Primitive));
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
-
- double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderTwoLevel");
-
- /* resize object array if scene got larger */
- if (bvh->objects.size() < num) bvh->objects.resize(num);
- if (builders.size() < num) builders.resize(num);
- resizeRefsList ();
- nextRef.store(0);
-
- /* create acceleration structures */
- parallel_for(size_t(0), num, [&] (const range<size_t>& r)
- {
- for (size_t objectID=r.begin(); objectID<r.end(); objectID++)
- {
- Mesh* mesh = scene->getSafe<Mesh>(objectID);
-
- /* ignore meshes we do not support */
- if (mesh == nullptr || mesh->numTimeSteps != 1)
- continue;
-
- if (isSmallGeometry(mesh)) {
- setupSmallBuildRefBuilder (objectID, mesh);
- } else {
- setupLargeBuildRefBuilder (objectID, mesh);
- }
- }
- });
-
- /* parallel build of acceleration structures */
- parallel_for(size_t(0), num, [&] (const range<size_t>& r)
- {
- for (size_t objectID=r.begin(); objectID<r.end(); objectID++)
- {
- /* ignore if no triangle mesh or not enabled */
- Mesh* mesh = scene->getSafe<Mesh>(objectID);
- if (mesh == nullptr || !mesh->isEnabled() || mesh->numTimeSteps != 1)
- continue;
-
- builders[objectID]->attachBuildRefs (this);
- }
- });
-
-
-#if PROFILE
- double d0 = getSeconds();
-#endif
- /* fast path for single geometry scenes */
- if (nextRef == 1) {
- bvh->set(refs[0].node,LBBox3fa(refs[0].bounds()),numPrimitives);
- }
-
- else
- {
- /* open all large nodes */
- refs.resize(nextRef);
-
- /* this probably needs some more tuning */
- const size_t extSize = max(max((size_t)SPLIT_MIN_EXT_SPACE,refs.size()*SPLIT_MEMORY_RESERVE_SCALE),size_t((float)numPrimitives / SPLIT_MEMORY_RESERVE_FACTOR));
-
-#if !ENABLE_DIRECT_SAH_MERGE_BUILDER
-
-#if ENABLE_OPEN_SEQUENTIAL
- open_sequential(extSize);
-#endif
- /* compute PrimRefs */
- prims.resize(refs.size());
-#endif
-
-#if defined(TASKING_TBB) && defined(__AVX512ER__) && USE_TASK_ARENA // KNL
- tbb::task_arena limited(min(32,(int)TaskScheduler::threadCount()));
- limited.execute([&]
-#endif
- {
-#if ENABLE_DIRECT_SAH_MERGE_BUILDER
-
- const PrimInfo pinfo = parallel_reduce(size_t(0), refs.size(), PrimInfo(empty), [&] (const range<size_t>& r) -> PrimInfo {
-
- PrimInfo pinfo(empty);
- for (size_t i=r.begin(); i<r.end(); i++) {
- pinfo.add_center2(refs[i]);
- }
- return pinfo;
- }, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
-
-#else
- const PrimInfo pinfo = parallel_reduce(size_t(0), refs.size(), PrimInfo(empty), [&] (const range<size_t>& r) -> PrimInfo {
-
- PrimInfo pinfo(empty);
- for (size_t i=r.begin(); i<r.end(); i++) {
- pinfo.add_center2(refs[i]);
- prims[i] = PrimRef(refs[i].bounds(),(size_t)refs[i].node);
- }
- return pinfo;
- }, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
-#endif
-
- /* skip if all objects where empty */
- if (pinfo.size() == 0)
- bvh->set(BVH::emptyNode,empty,0);
-
- /* otherwise build toplevel hierarchy */
- else
- {
- /* settings for BVH build */
- GeneralBVHBuilder::Settings settings;
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxBuildDepthLeaf;
- settings.logBlockSize = bsr(N);
- settings.minLeafSize = 1;
- settings.maxLeafSize = 1;
- settings.travCost = 1.0f;
- settings.intCost = 1.0f;
- settings.singleThreadThreshold = singleThreadThreshold;
-
-#if ENABLE_DIRECT_SAH_MERGE_BUILDER
-
- refs.resize(extSize);
-
- NodeRef root = BVHBuilderBinnedOpenMergeSAH::build<NodeRef,BuildRef>(
- typename BVH::CreateAlloc(bvh),
- typename BVH::AABBNode::Create2(),
- typename BVH::AABBNode::Set2(),
-
- [&] (const BuildRef* refs, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> NodeRef {
- assert(range.size() == 1);
- return (NodeRef) refs[range.begin()].node;
- },
- [&] (BuildRef &bref, BuildRef *refs) -> size_t {
- return openBuildRef(bref,refs);
- },
- [&] (size_t dn) { bvh->scene->progressMonitor(0); },
- refs.data(),extSize,pinfo,settings);
-#else
- NodeRef root = BVHBuilderBinnedSAH::build<NodeRef>(
- typename BVH::CreateAlloc(bvh),
- typename BVH::AABBNode::Create2(),
- typename BVH::AABBNode::Set2(),
-
- [&] (const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> NodeRef {
- assert(range.size() == 1);
- return (NodeRef) prims[range.begin()].ID();
- },
- [&] (size_t dn) { bvh->scene->progressMonitor(0); },
- prims.data(),pinfo,settings);
-#endif
-
-
- bvh->set(root,LBBox3fa(pinfo.geomBounds),numPrimitives);
- }
- }
-#if defined(TASKING_TBB) && defined(__AVX512ER__) && USE_TASK_ARENA // KNL
- );
-#endif
-
- }
-
- bvh->alloc.cleanup();
- bvh->postBuild(t0);
-#if PROFILE
- double d1 = getSeconds();
- std::cout << "TOP_LEVEL OPENING/REBUILD TIME " << 1000.0*(d1-d0) << " ms" << std::endl;
-#endif
- }
-
- }
-
- template<int N, typename Mesh, typename Primitive>
- void BVHNBuilderTwoLevel<N,Mesh,Primitive>::deleteGeometry(size_t geomID)
- {
- if (geomID >= bvh->objects.size()) return;
- if (builders[geomID]) builders[geomID].reset();
- delete bvh->objects [geomID]; bvh->objects [geomID] = nullptr;
- }
-
- template<int N, typename Mesh, typename Primitive>
- void BVHNBuilderTwoLevel<N,Mesh,Primitive>::clear()
- {
- for (size_t i=0; i<bvh->objects.size(); i++)
- if (bvh->objects[i]) bvh->objects[i]->clear();
-
- for (size_t i=0; i<builders.size(); i++)
- if (builders[i]) builders[i].reset();
-
- refs.clear();
- }
-
- template<int N, typename Mesh, typename Primitive>
- void BVHNBuilderTwoLevel<N,Mesh,Primitive>::open_sequential(const size_t extSize)
- {
- if (refs.size() == 0)
- return;
-
- refs.reserve(extSize);
-
-#if 1
- for (size_t i=0;i<refs.size();i++)
- {
- NodeRef ref = refs[i].node;
- if (ref.isAABBNode())
- BVH::prefetch(ref);
- }
-#endif
-
- std::make_heap(refs.begin(),refs.end());
- while (refs.size()+N-1 <= extSize)
- {
- std::pop_heap (refs.begin(),refs.end());
- NodeRef ref = refs.back().node;
- if (ref.isLeaf()) break;
- refs.pop_back();
-
- AABBNode* node = ref.getAABBNode();
- for (size_t i=0; i<N; i++) {
- if (node->child(i) == BVH::emptyNode) continue;
- refs.push_back(BuildRef(node->bounds(i),node->child(i)));
-
-#if 1
- NodeRef ref_pre = node->child(i);
- if (ref_pre.isAABBNode())
- ref_pre.prefetch();
-#endif
- std::push_heap (refs.begin(),refs.end());
- }
- }
- }
-
- template<int N, typename Mesh, typename Primitive>
- void BVHNBuilderTwoLevel<N,Mesh,Primitive>::setupSmallBuildRefBuilder (size_t objectID, Mesh const * const /*mesh*/)
- {
- if (builders[objectID] == nullptr || // new mesh
- dynamic_cast<RefBuilderSmall*>(builders[objectID].get()) == nullptr) // size change resulted in large->small change
- {
- builders[objectID].reset (new RefBuilderSmall(objectID));
- }
- }
-
- template<int N, typename Mesh, typename Primitive>
- void BVHNBuilderTwoLevel<N,Mesh,Primitive>::setupLargeBuildRefBuilder (size_t objectID, Mesh const * const mesh)
- {
- if (bvh->objects[objectID] == nullptr || // new mesh
- builders[objectID]->meshQualityChanged (mesh->quality) || // changed build quality
- dynamic_cast<RefBuilderLarge*>(builders[objectID].get()) == nullptr) // size change resulted in small->large change
- {
- Builder* builder = nullptr;
- delete bvh->objects[objectID];
- createMeshAccel(objectID, builder);
- builders[objectID].reset (new RefBuilderLarge(objectID, builder, mesh->quality));
- }
- }
-
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- Builder* BVH4BuilderTwoLevelTriangle4MeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
- }
- Builder* BVH4BuilderTwoLevelTriangle4vMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4v>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
- }
- Builder* BVH4BuilderTwoLevelTriangle4iMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4i>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
- }
-#endif
-
-#if defined(EMBREE_GEOMETRY_QUAD)
- Builder* BVH4BuilderTwoLevelQuadMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<4,QuadMesh,Quad4v>((BVH4*)bvh,scene,QuadMesh::geom_type,useMortonBuilder);
- }
-#endif
-
-#if defined(EMBREE_GEOMETRY_USER)
- Builder* BVH4BuilderTwoLevelVirtualSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<4,UserGeometry,Object>((BVH4*)bvh,scene,UserGeometry::geom_type,useMortonBuilder);
- }
-#endif
-
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- Builder* BVH4BuilderTwoLevelInstanceSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<4,Instance,InstancePrimitive>((BVH4*)bvh,scene,gtype,useMortonBuilder);
- }
-#endif
-
-#if defined(__AVX__)
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- Builder* BVH8BuilderTwoLevelTriangle4MeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
- }
- Builder* BVH8BuilderTwoLevelTriangle4vMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4v>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
- }
- Builder* BVH8BuilderTwoLevelTriangle4iMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4i>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
- }
-#endif
-
-#if defined(EMBREE_GEOMETRY_QUAD)
- Builder* BVH8BuilderTwoLevelQuadMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<8,QuadMesh,Quad4v>((BVH8*)bvh,scene,QuadMesh::geom_type,useMortonBuilder);
- }
-#endif
-
-#if defined(EMBREE_GEOMETRY_USER)
- Builder* BVH8BuilderTwoLevelVirtualSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<8,UserGeometry,Object>((BVH8*)bvh,scene,UserGeometry::geom_type,useMortonBuilder);
- }
-#endif
-
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- Builder* BVH8BuilderTwoLevelInstanceSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<8,Instance,InstancePrimitive>((BVH8*)bvh,scene,gtype,useMortonBuilder);
- }
-#endif
-
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.h
deleted file mode 100644
index 8f57c3b406..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.h
+++ /dev/null
@@ -1,263 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include <type_traits>
-
-#include "bvh_builder_twolevel_internal.h"
-#include "bvh.h"
-#include "../common/primref.h"
-#include "../builders/priminfo.h"
-#include "../builders/primrefgen.h"
-
-/* new open/merge builder */
-#define ENABLE_DIRECT_SAH_MERGE_BUILDER 1
-#define ENABLE_OPEN_SEQUENTIAL 0
-#define SPLIT_MEMORY_RESERVE_FACTOR 1000
-#define SPLIT_MEMORY_RESERVE_SCALE 2
-#define SPLIT_MIN_EXT_SPACE 1000
-
-namespace embree
-{
- namespace isa
- {
- template<int N, typename Mesh, typename Primitive>
- class BVHNBuilderTwoLevel : public Builder
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::AABBNode AABBNode;
- typedef typename BVH::NodeRef NodeRef;
-
- __forceinline static bool isSmallGeometry(Mesh* mesh) {
- return mesh->size() <= 4;
- }
-
- public:
-
- typedef void (*createMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
-
- struct BuildRef : public PrimRef
- {
- public:
- __forceinline BuildRef () {}
-
- __forceinline BuildRef (const BBox3fa& bounds, NodeRef node)
- : PrimRef(bounds,(size_t)node), node(node)
- {
- if (node.isLeaf())
- bounds_area = 0.0f;
- else
- bounds_area = area(this->bounds());
- }
-
- /* used by the open/merge bvh builder */
- __forceinline BuildRef (const BBox3fa& bounds, NodeRef node, const unsigned int geomID, const unsigned int numPrimitives)
- : PrimRef(bounds,geomID,numPrimitives), node(node)
- {
- /* important for relative buildref ordering */
- if (node.isLeaf())
- bounds_area = 0.0f;
- else
- bounds_area = area(this->bounds());
- }
-
- __forceinline size_t size() const {
- return primID();
- }
-
- friend bool operator< (const BuildRef& a, const BuildRef& b) {
- return a.bounds_area < b.bounds_area;
- }
-
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const BuildRef& ref) {
- return cout << "{ lower = " << ref.lower << ", upper = " << ref.upper << ", center2 = " << ref.center2() << ", geomID = " << ref.geomID() << ", numPrimitives = " << ref.numPrimitives() << ", bounds_area = " << ref.bounds_area << " }";
- }
-
- __forceinline unsigned int numPrimitives() const { return primID(); }
-
- public:
- NodeRef node;
- float bounds_area;
- };
-
-
- __forceinline size_t openBuildRef(BuildRef &bref, BuildRef *const refs) {
- if (bref.node.isLeaf())
- {
- refs[0] = bref;
- return 1;
- }
- NodeRef ref = bref.node;
- unsigned int geomID = bref.geomID();
- unsigned int numPrims = max((unsigned int)bref.numPrimitives() / N,(unsigned int)1);
- AABBNode* node = ref.getAABBNode();
- size_t n = 0;
- for (size_t i=0; i<N; i++) {
- if (node->child(i) == BVH::emptyNode) continue;
- refs[i] = BuildRef(node->bounds(i),node->child(i),geomID,numPrims);
- n++;
- }
- assert(n > 1);
- return n;
- }
-
- /*! Constructor. */
- BVHNBuilderTwoLevel (BVH* bvh, Scene* scene, Geometry::GTypeMask gtype = Mesh::geom_type, bool useMortonBuilder = false, const size_t singleThreadThreshold = DEFAULT_SINGLE_THREAD_THRESHOLD);
-
- /*! Destructor */
- ~BVHNBuilderTwoLevel ();
-
- /*! builder entry point */
- void build();
- void deleteGeometry(size_t geomID);
- void clear();
-
- void open_sequential(const size_t extSize);
-
- private:
-
- class RefBuilderBase {
- public:
- virtual ~RefBuilderBase () {}
- virtual void attachBuildRefs (BVHNBuilderTwoLevel* builder) = 0;
- virtual bool meshQualityChanged (RTCBuildQuality currQuality) = 0;
- };
-
- class RefBuilderSmall : public RefBuilderBase {
- public:
-
- RefBuilderSmall (size_t objectID)
- : objectID_ (objectID) {}
-
- void attachBuildRefs (BVHNBuilderTwoLevel* topBuilder) {
-
- Mesh* mesh = topBuilder->scene->template getSafe<Mesh>(objectID_);
- size_t meshSize = mesh->size();
- assert(isSmallGeometry(mesh));
-
- mvector<PrimRef> prefs(topBuilder->scene->device, meshSize);
- auto pinfo = createPrimRefArray(mesh,objectID_,prefs,topBuilder->bvh->scene->progressInterface);
-
- size_t begin=0;
- while (begin < pinfo.size())
- {
- Primitive* accel = (Primitive*) topBuilder->bvh->alloc.getCachedAllocator().malloc1(sizeof(Primitive),BVH::byteAlignment);
- typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,1);
- accel->fill(prefs.data(),begin,pinfo.size(),topBuilder->bvh->scene);
-
- /* create build primitive */
-#if ENABLE_DIRECT_SAH_MERGE_BUILDER
- topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(pinfo.geomBounds,node,(unsigned int)objectID_,1);
-#else
- topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(pinfo.geomBounds,node);
-#endif
- }
- assert(begin == pinfo.size());
- }
-
- bool meshQualityChanged (RTCBuildQuality /*currQuality*/) {
- return false;
- }
-
- size_t objectID_;
- };
-
- class RefBuilderLarge : public RefBuilderBase {
- public:
-
- RefBuilderLarge (size_t objectID, const Ref<Builder>& builder, RTCBuildQuality quality)
- : objectID_ (objectID), builder_ (builder), quality_ (quality) {}
-
- void attachBuildRefs (BVHNBuilderTwoLevel* topBuilder)
- {
- BVH* object = topBuilder->getBVH(objectID_); assert(object);
-
- /* build object if it got modified */
- if (topBuilder->isGeometryModified(objectID_))
- builder_->build();
-
- /* create build primitive */
- if (!object->getBounds().empty())
- {
-#if ENABLE_DIRECT_SAH_MERGE_BUILDER
- Mesh* mesh = topBuilder->getMesh(objectID_);
- topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(object->getBounds(),object->root,(unsigned int)objectID_,(unsigned int)mesh->size());
-#else
- topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(object->getBounds(),object->root);
-#endif
- }
- }
-
- bool meshQualityChanged (RTCBuildQuality currQuality) {
- return currQuality != quality_;
- }
-
- private:
- size_t objectID_;
- Ref<Builder> builder_;
- RTCBuildQuality quality_;
- };
-
- void setupLargeBuildRefBuilder (size_t objectID, Mesh const * const mesh);
- void setupSmallBuildRefBuilder (size_t objectID, Mesh const * const mesh);
-
- BVH* getBVH (size_t objectID) {
- return this->bvh->objects[objectID];
- }
- Mesh* getMesh (size_t objectID) {
- return this->scene->template getSafe<Mesh>(objectID);
- }
- bool isGeometryModified (size_t objectID) {
- return this->scene->isGeometryModified(objectID);
- }
-
- void resizeRefsList ()
- {
- size_t num = parallel_reduce (size_t(0), scene->size(), size_t(0),
- [this](const range<size_t>& r)->size_t {
- size_t c = 0;
- for (auto i=r.begin(); i<r.end(); ++i) {
- Mesh* mesh = scene->getSafe<Mesh>(i);
- if (mesh == nullptr || mesh->numTimeSteps != 1)
- continue;
- size_t meshSize = mesh->size();
- c += isSmallGeometry(mesh) ? Primitive::blocks(meshSize) : 1;
- }
- return c;
- },
- std::plus<size_t>()
- );
-
- if (refs.size() < num) {
- refs.resize(num);
- }
- }
-
- void createMeshAccel (size_t geomID, Builder*& builder)
- {
- bvh->objects[geomID] = new BVH(Primitive::type,scene);
- BVH* accel = bvh->objects[geomID];
- auto mesh = scene->getSafe<Mesh>(geomID);
- if (nullptr == mesh) {
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"geomID does not return correct type");
- return;
- }
-
- __internal_two_level_builder__::MeshBuilder<N,Mesh,Primitive>()(accel, mesh, geomID, this->gtype, this->useMortonBuilder_, builder);
- }
-
- using BuilderList = std::vector<std::unique_ptr<RefBuilderBase>>;
-
- BuilderList builders;
- BVH* bvh;
- Scene* scene;
- mvector<BuildRef> refs;
- mvector<PrimRef> prims;
- std::atomic<int> nextRef;
- const size_t singleThreadThreshold;
- Geometry::GTypeMask gtype;
- bool useMortonBuilder_ = false;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel_internal.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel_internal.h
deleted file mode 100644
index 1c1ae8d6a7..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel_internal.h
+++ /dev/null
@@ -1,267 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-#include "../geometry/triangle.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/trianglei.h"
-#include "../geometry/quadv.h"
-#include "../geometry/quadi.h"
-#include "../geometry/object.h"
-#include "../geometry/instance.h"
-
-namespace embree
-{
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshBuilderMortonGeneral,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshBuilderSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshRefitSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshBuilderMortonGeneral,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshBuilderSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshRefitSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshBuilderMortonGeneral,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshBuilderSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshRefitSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshBuilderMortonGeneral,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshBuilderSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshRefitSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshBuilderMortonGeneral,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshBuilderSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshRefitSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshBuilderMortonGeneral,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshBuilderSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshRefitSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
-
- namespace isa
- {
-
- namespace __internal_two_level_builder__ {
-
- template<int N, typename Mesh, typename Primitive>
- struct MortonBuilder {};
- template<>
- struct MortonBuilder<4,TriangleMesh,Triangle4> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<4,TriangleMesh,Triangle4v> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<4,TriangleMesh,Triangle4i> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<4,QuadMesh,Quad4v> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<4,UserGeometry,Object> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<4,Instance,InstancePrimitive> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
- };
- template<>
- struct MortonBuilder<8,TriangleMesh,Triangle4> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<8,TriangleMesh,Triangle4v> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<8,TriangleMesh,Triangle4i> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<8,QuadMesh,Quad4v> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<8,UserGeometry,Object> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<8,Instance,InstancePrimitive> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
- };
-
- template<int N, typename Mesh, typename Primitive>
- struct SAHBuilder {};
- template<>
- struct SAHBuilder<4,TriangleMesh,Triangle4> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<4,TriangleMesh,Triangle4v> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<4,TriangleMesh,Triangle4i> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<4,QuadMesh,Quad4v> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<4,UserGeometry,Object> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<4,Instance,InstancePrimitive> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
- };
- template<>
- struct SAHBuilder<8,TriangleMesh,Triangle4> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<8,TriangleMesh,Triangle4v> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<8,TriangleMesh,Triangle4i> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<8,QuadMesh,Quad4v> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<8,UserGeometry,Object> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<8,Instance,InstancePrimitive> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
- };
-
- template<int N, typename Mesh, typename Primitive>
- struct RefitBuilder {};
- template<>
- struct RefitBuilder<4,TriangleMesh,Triangle4> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<4,TriangleMesh,Triangle4v> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<4,TriangleMesh,Triangle4i> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<4,QuadMesh,Quad4v> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<4,UserGeometry,Object> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<4,Instance,InstancePrimitive> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
- };
- template<>
- struct RefitBuilder<8,TriangleMesh,Triangle4> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<8,TriangleMesh,Triangle4v> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<8,TriangleMesh,Triangle4i> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<8,QuadMesh,Quad4v> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<8,UserGeometry,Object> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<8,Instance,InstancePrimitive> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
- };
-
- template<int N, typename Mesh, typename Primitive>
- struct MeshBuilder {
- MeshBuilder () {}
- void operator () (void* bvh, Mesh* mesh, size_t geomID, Geometry::GTypeMask gtype, bool useMortonBuilder, Builder*& builder) {
- if(useMortonBuilder) {
- builder = MortonBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype);
- return;
- }
- switch (mesh->quality) {
- case RTC_BUILD_QUALITY_LOW: builder = MortonBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
- case RTC_BUILD_QUALITY_MEDIUM:
- case RTC_BUILD_QUALITY_HIGH: builder = SAHBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
- case RTC_BUILD_QUALITY_REFIT: builder = RefitBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
- default: throw_RTCError(RTC_ERROR_UNKNOWN,"invalid build quality");
- }
- }
- };
- }
- }
-} \ No newline at end of file
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.cpp
deleted file mode 100644
index a27be8bae8..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.cpp
+++ /dev/null
@@ -1,375 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh_collider.h"
-#include "../geometry/triangle_triangle_intersector.h"
-
-namespace embree
-{
- namespace isa
- {
-#define CSTAT(x)
-
- size_t parallel_depth_threshold = 3;
- CSTAT(std::atomic<size_t> bvh_collide_traversal_steps(0));
- CSTAT(std::atomic<size_t> bvh_collide_leaf_pairs(0));
- CSTAT(std::atomic<size_t> bvh_collide_leaf_iterations(0));
- CSTAT(std::atomic<size_t> bvh_collide_prim_intersections1(0));
- CSTAT(std::atomic<size_t> bvh_collide_prim_intersections2(0));
- CSTAT(std::atomic<size_t> bvh_collide_prim_intersections3(0));
- CSTAT(std::atomic<size_t> bvh_collide_prim_intersections4(0));
- CSTAT(std::atomic<size_t> bvh_collide_prim_intersections5(0));
- CSTAT(std::atomic<size_t> bvh_collide_prim_intersections(0));
-
- struct Collision
- {
- __forceinline Collision() {}
-
- __forceinline Collision (unsigned geomID0, unsigned primID0, unsigned geomID1, unsigned primID1)
- : geomID0(geomID0), primID0(primID0), geomID1(geomID1), primID1(primID1) {}
-
- unsigned geomID0;
- unsigned primID0;
- unsigned geomID1;
- unsigned primID1;
- };
-
- template<int N>
- __forceinline size_t overlap(const BBox3fa& box0, const typename BVHN<N>::AABBNode& node1)
- {
- const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x),node1.lower_x);
- const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y),node1.lower_y);
- const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z),node1.lower_z);
- const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x),node1.upper_x);
- const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y),node1.upper_y);
- const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z),node1.upper_z);
- return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
- }
-
- template<int N>
- __forceinline size_t overlap(const BBox3fa& box0, const BBox<Vec3<vfloat<N>>>& box1)
- {
- const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x),box1.lower.x);
- const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y),box1.lower.y);
- const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z),box1.lower.z);
- const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x),box1.upper.x);
- const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y),box1.upper.y);
- const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z),box1.upper.z);
- return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
- }
-
- template<int N>
- __forceinline size_t overlap(const BBox<Vec3<vfloat<N>>>& box0, size_t i, const BBox<Vec3<vfloat<N>>>& box1)
- {
- const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x[i]),box1.lower.x);
- const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y[i]),box1.lower.y);
- const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z[i]),box1.lower.z);
- const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x[i]),box1.upper.x);
- const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y[i]),box1.upper.y);
- const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z[i]),box1.upper.z);
- return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
- }
-
- bool intersect_triangle_triangle (Scene* scene0, unsigned geomID0, unsigned primID0, Scene* scene1, unsigned geomID1, unsigned primID1)
- {
- CSTAT(bvh_collide_prim_intersections1++);
- const TriangleMesh* mesh0 = scene0->get<TriangleMesh>(geomID0);
- const TriangleMesh* mesh1 = scene1->get<TriangleMesh>(geomID1);
- const TriangleMesh::Triangle& tri0 = mesh0->triangle(primID0);
- const TriangleMesh::Triangle& tri1 = mesh1->triangle(primID1);
-
- /* special culling for scene intersection with itself */
- if (scene0 == scene1 && geomID0 == geomID1)
- {
- /* ignore self intersections */
- if (primID0 == primID1)
- return false;
- }
- CSTAT(bvh_collide_prim_intersections2++);
-
- if (scene0 == scene1 && geomID0 == geomID1)
- {
- /* ignore intersection with topological neighbors */
- const vint4 t0(tri0.v[0],tri0.v[1],tri0.v[2],tri0.v[2]);
- if (any(vint4(tri1.v[0]) == t0)) return false;
- if (any(vint4(tri1.v[1]) == t0)) return false;
- if (any(vint4(tri1.v[2]) == t0)) return false;
- }
- CSTAT(bvh_collide_prim_intersections3++);
-
- const Vec3fa a0 = mesh0->vertex(tri0.v[0]);
- const Vec3fa a1 = mesh0->vertex(tri0.v[1]);
- const Vec3fa a2 = mesh0->vertex(tri0.v[2]);
- const Vec3fa b0 = mesh1->vertex(tri1.v[0]);
- const Vec3fa b1 = mesh1->vertex(tri1.v[1]);
- const Vec3fa b2 = mesh1->vertex(tri1.v[2]);
-
- return TriangleTriangleIntersector::intersect_triangle_triangle(a0,a1,a2,b0,b1,b2);
- }
-
- template<int N>
- __forceinline void BVHNColliderUserGeom<N>::processLeaf(NodeRef node0, NodeRef node1)
- {
- Collision collisions[16];
- size_t num_collisions = 0;
-
- size_t N0; Object* leaf0 = (Object*) node0.leaf(N0);
- size_t N1; Object* leaf1 = (Object*) node1.leaf(N1);
- for (size_t i=0; i<N0; i++) {
- for (size_t j=0; j<N1; j++) {
- const unsigned geomID0 = leaf0[i].geomID();
- const unsigned primID0 = leaf0[i].primID();
- const unsigned geomID1 = leaf1[j].geomID();
- const unsigned primID1 = leaf1[j].primID();
- if (this->scene0 == this->scene1 && geomID0 == geomID1 && primID0 == primID1) continue;
- collisions[num_collisions++] = Collision(geomID0,primID0,geomID1,primID1);
- if (num_collisions == 16) {
- this->callback(this->userPtr,(RTCCollision*)&collisions,num_collisions);
- num_collisions = 0;
- }
- }
- }
- if (num_collisions)
- this->callback(this->userPtr,(RTCCollision*)&collisions,num_collisions);
- }
-
- template<int N>
- void BVHNCollider<N>::collide_recurse(NodeRef ref0, const BBox3fa& bounds0, NodeRef ref1, const BBox3fa& bounds1, size_t depth0, size_t depth1)
- {
- CSTAT(bvh_collide_traversal_steps++);
- if (unlikely(ref0.isLeaf())) {
- if (unlikely(ref1.isLeaf())) {
- CSTAT(bvh_collide_leaf_pairs++);
- processLeaf(ref0,ref1);
- return;
- } else goto recurse_node1;
-
- } else {
- if (unlikely(ref1.isLeaf())) {
- goto recurse_node0;
- } else {
- if (area(bounds0) > area(bounds1)) {
- goto recurse_node0;
- }
- else {
- goto recurse_node1;
- }
- }
- }
-
- {
- recurse_node0:
- AABBNode* node0 = ref0.getAABBNode();
- size_t mask = overlap<N>(bounds1,*node0);
- //for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
- //for (size_t i=0; i<N; i++) {
-#if 0
- if (depth0 < parallel_depth_threshold)
- {
- parallel_for(size_t(N), [&] ( size_t i ) {
- if (mask & ( 1 << i)) {
- BVHN<N>::prefetch(node0->child(i),BVH_FLAG_ALIGNED_NODE);
- collide_recurse(node0->child(i),node0->bounds(i),ref1,bounds1,depth0+1,depth1);
- }
- });
- }
- else
-#endif
- {
- for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
- BVHN<N>::prefetch(node0->child(i),BVH_FLAG_ALIGNED_NODE);
- collide_recurse(node0->child(i),node0->bounds(i),ref1,bounds1,depth0+1,depth1);
- }
- }
- return;
- }
-
- {
- recurse_node1:
- AABBNode* node1 = ref1.getAABBNode();
- size_t mask = overlap<N>(bounds0,*node1);
- //for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
- //for (size_t i=0; i<N; i++) {
-#if 0
- if (depth1 < parallel_depth_threshold)
- {
- parallel_for(size_t(N), [&] ( size_t i ) {
- if (mask & ( 1 << i)) {
- BVHN<N>::prefetch(node1->child(i),BVH_FLAG_ALIGNED_NODE);
- collide_recurse(ref0,bounds0,node1->child(i),node1->bounds(i),depth0,depth1+1);
- }
- });
- }
- else
-#endif
- {
- for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
- BVHN<N>::prefetch(node1->child(i),BVH_FLAG_ALIGNED_NODE);
- collide_recurse(ref0,bounds0,node1->child(i),node1->bounds(i),depth0,depth1+1);
- }
- }
- return;
- }
- }
-
- template<int N>
- void BVHNCollider<N>::split(const CollideJob& job, jobvector& jobs)
- {
- if (unlikely(job.ref0.isLeaf())) {
- if (unlikely(job.ref1.isLeaf())) {
- jobs.push_back(job);
- return;
- } else goto recurse_node1;
- } else {
- if (unlikely(job.ref1.isLeaf())) {
- goto recurse_node0;
- } else {
- if (area(job.bounds0) > area(job.bounds1)) {
- goto recurse_node0;
- }
- else {
- goto recurse_node1;
- }
- }
- }
-
- {
- recurse_node0:
- const AABBNode* node0 = job.ref0.getAABBNode();
- size_t mask = overlap<N>(job.bounds1,*node0);
- for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
- jobs.push_back(CollideJob(node0->child(i),node0->bounds(i),job.depth0+1,job.ref1,job.bounds1,job.depth1));
- }
- return;
- }
-
- {
- recurse_node1:
- const AABBNode* node1 = job.ref1.getAABBNode();
- size_t mask = overlap<N>(job.bounds0,*node1);
- for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
- jobs.push_back(CollideJob(job.ref0,job.bounds0,job.depth0,node1->child(i),node1->bounds(i),job.depth1+1));
- }
- return;
- }
- }
-
- template<int N>
- void BVHNCollider<N>::collide_recurse_entry(NodeRef ref0, const BBox3fa& bounds0, NodeRef ref1, const BBox3fa& bounds1)
- {
- CSTAT(bvh_collide_traversal_steps = 0);
- CSTAT(bvh_collide_leaf_pairs = 0);
- CSTAT(bvh_collide_leaf_iterations = 0);
- CSTAT(bvh_collide_prim_intersections1 = 0);
- CSTAT(bvh_collide_prim_intersections2 = 0);
- CSTAT(bvh_collide_prim_intersections3 = 0);
- CSTAT(bvh_collide_prim_intersections4 = 0);
- CSTAT(bvh_collide_prim_intersections5 = 0);
- CSTAT(bvh_collide_prim_intersections = 0);
-#if 0
- collide_recurse(ref0,bounds0,ref1,bounds1,0,0);
-#else
- const int M = 2048;
- jobvector jobs[2];
- jobs[0].reserve(M);
- jobs[1].reserve(M);
- jobs[0].push_back(CollideJob(ref0,bounds0,0,ref1,bounds1,0));
- int source = 0;
- int target = 1;
-
- /* try to split job until job list is full */
- while (jobs[source].size()+8 <= M)
- {
- for (size_t i=0; i<jobs[source].size(); i++)
- {
- const CollideJob& job = jobs[source][i];
- size_t remaining = jobs[source].size()-i;
- if (jobs[target].size()+remaining+8 > M) {
- jobs[target].push_back(job);
- } else {
- split(job,jobs[target]);
- }
- }
-
- /* stop splitting jobs if we reached only leaves and cannot make progress anymore */
- if (jobs[target].size() == jobs[source].size())
- break;
-
- jobs[source].resize(0);
- std::swap(source,target);
- }
-
- /* parallel processing of all jobs */
- parallel_for(size_t(jobs[source].size()), [&] ( size_t i ) {
- CollideJob& j = jobs[source][i];
- collide_recurse(j.ref0,j.bounds0,j.ref1,j.bounds1,j.depth0,j.depth1);
- });
-
-
-#endif
- CSTAT(PRINT(bvh_collide_traversal_steps));
- CSTAT(PRINT(bvh_collide_leaf_pairs));
- CSTAT(PRINT(bvh_collide_leaf_iterations));
- CSTAT(PRINT(bvh_collide_prim_intersections1));
- CSTAT(PRINT(bvh_collide_prim_intersections2));
- CSTAT(PRINT(bvh_collide_prim_intersections3));
- CSTAT(PRINT(bvh_collide_prim_intersections4));
- CSTAT(PRINT(bvh_collide_prim_intersections5));
- CSTAT(PRINT(bvh_collide_prim_intersections));
- }
-
- template<int N>
- void BVHNColliderUserGeom<N>::collide(BVH* __restrict__ bvh0, BVH* __restrict__ bvh1, RTCCollideFunc callback, void* userPtr)
- {
- BVHNColliderUserGeom<N>(bvh0->scene,bvh1->scene,callback,userPtr).
- collide_recurse_entry(bvh0->root,bvh0->bounds.bounds(),bvh1->root,bvh1->bounds.bounds());
- }
-
-#if defined (EMBREE_LOWEST_ISA)
- struct collision_regression_test : public RegressionTest
- {
- collision_regression_test(const char* name) : RegressionTest(name) {
- registerRegressionTest(this);
- }
-
- bool run ()
- {
- bool passed = true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(-0.008815f, 0.041848f, -2.49875e-06f), Vec3fa(-0.008276f, 0.053318f, -2.49875e-06f), Vec3fa(0.003023f, 0.048969f, -2.49875e-06f),
- Vec3fa(0.00245f, 0.037612f, -2.49875e-06f), Vec3fa(0.01434f, 0.042634f, -2.49875e-06f), Vec3fa(0.013499f, 0.031309f, -2.49875e-06f)) == false;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,1),Vec3fa(1,0,1),Vec3fa(0,1,1)) == false;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,1),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,-0.1f),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(0.5f,0,0),Vec3fa(0,0.5f,0)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(0.5f,0,0),Vec3fa(0,0.5f,0)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,-0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(-0.1f,0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
- Vec3fa(-1,1,0) + Vec3fa(0,0,0),Vec3fa(-1,1,0) + Vec3fa(0.1f,0,0),Vec3fa(-1,1,0) + Vec3fa(0,0.1f,0)) == false;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
- Vec3fa( 2,0.5f,0) + Vec3fa(0,0,0),Vec3fa( 2,0.5f,0) + Vec3fa(0.1f,0,0),Vec3fa( 2,0.5f,0) + Vec3fa(0,0.1f,0)) == false;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
- Vec3fa(0.5f,-2.0f,0) + Vec3fa(0,0,0),Vec3fa(0.5f,-2.0f,0) + Vec3fa(0.1f,0,0),Vec3fa(0.5f,-2.0f,0) + Vec3fa(0,0.1f,0)) == false;
- return passed;
- }
- };
-
- collision_regression_test collision_regression("collision_regression_test");
-#endif
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Collider Definitions
- ////////////////////////////////////////////////////////////////////////////////
-
- DEFINE_COLLIDER(BVH4ColliderUserGeom,BVHNColliderUserGeom<4>);
-
-#if defined(__AVX__)
- DEFINE_COLLIDER(BVH8ColliderUserGeom,BVHNColliderUserGeom<8>);
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.h
deleted file mode 100644
index ac4f99c96a..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.h
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/object.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int N>
- class BVHNCollider
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::AABBNode AABBNode;
-
- struct CollideJob
- {
- CollideJob () {}
-
- CollideJob (NodeRef ref0, const BBox3fa& bounds0, size_t depth0,
- NodeRef ref1, const BBox3fa& bounds1, size_t depth1)
- : ref0(ref0), bounds0(bounds0), depth0(depth0), ref1(ref1), bounds1(bounds1), depth1(depth1) {}
-
- NodeRef ref0;
- BBox3fa bounds0;
- size_t depth0;
- NodeRef ref1;
- BBox3fa bounds1;
- size_t depth1;
- };
-
- typedef vector_t<CollideJob, aligned_allocator<CollideJob,16>> jobvector;
-
- void split(const CollideJob& job, jobvector& jobs);
-
- public:
- __forceinline BVHNCollider (Scene* scene0, Scene* scene1, RTCCollideFunc callback, void* userPtr)
- : scene0(scene0), scene1(scene1), callback(callback), userPtr(userPtr) {}
-
- public:
- virtual void processLeaf(NodeRef leaf0, NodeRef leaf1) = 0;
- void collide_recurse(NodeRef node0, const BBox3fa& bounds0, NodeRef node1, const BBox3fa& bounds1, size_t depth0, size_t depth1);
- void collide_recurse_entry(NodeRef node0, const BBox3fa& bounds0, NodeRef node1, const BBox3fa& bounds1);
-
- protected:
- Scene* scene0;
- Scene* scene1;
- RTCCollideFunc callback;
- void* userPtr;
- };
-
- template<int N>
- class BVHNColliderUserGeom : public BVHNCollider<N>
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::AABBNode AABBNode;
-
- __forceinline BVHNColliderUserGeom (Scene* scene0, Scene* scene1, RTCCollideFunc callback, void* userPtr)
- : BVHNCollider<N>(scene0,scene1,callback,userPtr) {}
-
- virtual void processLeaf(NodeRef leaf0, NodeRef leaf1);
- public:
- static void collide(BVH* __restrict__ bvh0, BVH* __restrict__ bvh1, RTCCollideFunc callback, void* userPtr);
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_factory.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_factory.h
deleted file mode 100644
index 54021ca6eb..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_factory.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../bvh/bvh.h"
-#include "../common/isa.h"
-#include "../common/accel.h"
-#include "../common/scene.h"
-#include "../geometry/curve_intersector_virtual.h"
-
-namespace embree
-{
- /*! BVH instantiations */
- class BVHFactory
- {
- public:
- enum class BuildVariant { STATIC, DYNAMIC, HIGH_QUALITY };
- enum class IntersectVariant { FAST, ROBUST };
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.cpp
deleted file mode 100644
index ea6adc2717..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.cpp
+++ /dev/null
@@ -1,330 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh_intersector1.h"
-#include "node_intersector1.h"
-#include "bvh_traverser1.h"
-
-#include "../geometry/intersector_iterators.h"
-#include "../geometry/triangle_intersector.h"
-#include "../geometry/trianglev_intersector.h"
-#include "../geometry/trianglev_mb_intersector.h"
-#include "../geometry/trianglei_intersector.h"
-#include "../geometry/quadv_intersector.h"
-#include "../geometry/quadi_intersector.h"
-#include "../geometry/curveNv_intersector.h"
-#include "../geometry/curveNi_intersector.h"
-#include "../geometry/curveNi_mb_intersector.h"
-#include "../geometry/linei_intersector.h"
-#include "../geometry/subdivpatch1_intersector.h"
-#include "../geometry/object_intersector.h"
-#include "../geometry/instance_intersector.h"
-#include "../geometry/subgrid_intersector.h"
-#include "../geometry/subgrid_mb_intersector.h"
-#include "../geometry/curve_intersector_virtual.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int N, int types, bool robust, typename PrimitiveIntersector1>
- void BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::intersect(const Accel::Intersectors* __restrict__ This,
- RayHit& __restrict__ ray,
- IntersectContext* __restrict__ context)
- {
- const BVH* __restrict__ bvh = (const BVH*)This->ptr;
-
- /* we may traverse an empty BVH in case all geometry was invalid */
- if (bvh->root == BVH::emptyNode)
- return;
-
- /* perform per ray precalculations required by the primitive intersector */
- Precalculations pre(ray, bvh);
-
- /* stack state */
- StackItemT<NodeRef> stack[stackSize]; // stack of nodes
- StackItemT<NodeRef>* stackPtr = stack+1; // current stack pointer
- StackItemT<NodeRef>* stackEnd = stack+stackSize;
- stack[0].ptr = bvh->root;
- stack[0].dist = neg_inf;
-
- if (bvh->root == BVH::emptyNode)
- return;
-
- /* filter out invalid rays */
-#if defined(EMBREE_IGNORE_INVALID_RAYS)
- if (!ray.valid()) return;
-#endif
- /* verify correct input */
- assert(ray.valid());
- assert(ray.tnear() >= 0.0f);
- assert(!(types & BVH_MB) || (ray.time() >= 0.0f && ray.time() <= 1.0f));
-
- /* load the ray into SIMD registers */
- TravRay<N,Nx,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f));
-
- /* initialize the node traverser */
- BVHNNodeTraverser1Hit<N, Nx, types> nodeTraverser;
-
- /* pop loop */
- while (true) pop:
- {
- /* pop next node */
- if (unlikely(stackPtr == stack)) break;
- stackPtr--;
- NodeRef cur = NodeRef(stackPtr->ptr);
-
- /* if popped node is too far, pop next one */
-#if defined(__AVX512ER__)
- /* much faster on KNL */
- if (unlikely(any(vfloat<Nx>(*(float*)&stackPtr->dist) > tray.tfar)))
- continue;
-#else
- if (unlikely(*(float*)&stackPtr->dist > ray.tfar))
- continue;
-#endif
-
- /* downtraversal loop */
- while (true)
- {
- /* intersect node */
- size_t mask; vfloat<Nx> tNear;
- STAT3(normal.trav_nodes,1,1,1);
- bool nodeIntersected = BVHNNodeIntersector1<N, Nx, types, robust>::intersect(cur, tray, ray.time(), tNear, mask);
- if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
-
- /* if no child is hit, pop next node */
- if (unlikely(mask == 0))
- goto pop;
-
- /* select next child and push other children */
- nodeTraverser.traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
- }
-
- /* this is a leaf node */
- assert(cur != BVH::emptyNode);
- STAT3(normal.trav_leaves,1,1,1);
- size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
- size_t lazy_node = 0;
- PrimitiveIntersector1::intersect(This, pre, ray, context, prim, num, tray, lazy_node);
- tray.tfar = ray.tfar;
-
- /* push lazy node onto stack */
- if (unlikely(lazy_node)) {
- stackPtr->ptr = lazy_node;
- stackPtr->dist = neg_inf;
- stackPtr++;
- }
- }
- }
-
- template<int N, int types, bool robust, typename PrimitiveIntersector1>
- void BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::occluded(const Accel::Intersectors* __restrict__ This,
- Ray& __restrict__ ray,
- IntersectContext* __restrict__ context)
- {
- const BVH* __restrict__ bvh = (const BVH*)This->ptr;
-
- /* we may traverse an empty BVH in case all geometry was invalid */
- if (bvh->root == BVH::emptyNode)
- return;
-
- /* early out for already occluded rays */
- if (unlikely(ray.tfar < 0.0f))
- return;
-
- /* perform per ray precalculations required by the primitive intersector */
- Precalculations pre(ray, bvh);
-
- /* stack state */
- NodeRef stack[stackSize]; // stack of nodes that still need to get traversed
- NodeRef* stackPtr = stack+1; // current stack pointer
- NodeRef* stackEnd = stack+stackSize;
- stack[0] = bvh->root;
-
- /* filter out invalid rays */
-#if defined(EMBREE_IGNORE_INVALID_RAYS)
- if (!ray.valid()) return;
-#endif
-
- /* verify correct input */
- assert(ray.valid());
- assert(ray.tnear() >= 0.0f);
- assert(!(types & BVH_MB) || (ray.time() >= 0.0f && ray.time() <= 1.0f));
-
- /* load the ray into SIMD registers */
- TravRay<N,Nx,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f));
-
- /* initialize the node traverser */
- BVHNNodeTraverser1Hit<N, Nx, types> nodeTraverser;
-
- /* pop loop */
- while (true) pop:
- {
- /* pop next node */
- if (unlikely(stackPtr == stack)) break;
- stackPtr--;
- NodeRef cur = (NodeRef)*stackPtr;
-
- /* downtraversal loop */
- while (true)
- {
- /* intersect node */
- size_t mask; vfloat<Nx> tNear;
- STAT3(shadow.trav_nodes,1,1,1);
- bool nodeIntersected = BVHNNodeIntersector1<N, Nx, types, robust>::intersect(cur, tray, ray.time(), tNear, mask);
- if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
-
- /* if no child is hit, pop next node */
- if (unlikely(mask == 0))
- goto pop;
-
- /* select next child and push other children */
- nodeTraverser.traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
- }
-
- /* this is a leaf node */
- assert(cur != BVH::emptyNode);
- STAT3(shadow.trav_leaves,1,1,1);
- size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
- size_t lazy_node = 0;
- if (PrimitiveIntersector1::occluded(This, pre, ray, context, prim, num, tray, lazy_node)) {
- ray.tfar = neg_inf;
- break;
- }
-
- /* push lazy node onto stack */
- if (unlikely(lazy_node)) {
- *stackPtr = (NodeRef)lazy_node;
- stackPtr++;
- }
- }
- }
-
- template<int N, int types, bool robust, typename PrimitiveIntersector1>
- struct PointQueryDispatch
- {
- typedef typename PrimitiveIntersector1::Precalculations Precalculations;
- typedef typename PrimitiveIntersector1::Primitive Primitive;
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::AABBNode AABBNode;
- typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
-
- static const size_t stackSize = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
-
- /* right now AVX512KNL SIMD extension only for standard node types */
- static const size_t Nx = (types == BVH_AN1 || types == BVH_QN1) ? vextend<N>::size : N;
-
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context)
- {
- const BVH* __restrict__ bvh = (const BVH*)This->ptr;
-
- /* we may traverse an empty BVH in case all geometry was invalid */
- if (bvh->root == BVH::emptyNode)
- return false;
-
- /* stack state */
- StackItemT<NodeRef> stack[stackSize]; // stack of nodes
- StackItemT<NodeRef>* stackPtr = stack+1; // current stack pointer
- StackItemT<NodeRef>* stackEnd = stack+stackSize;
- stack[0].ptr = bvh->root;
- stack[0].dist = neg_inf;
-
- /* verify correct input */
- assert(!(types & BVH_MB) || (query->time >= 0.0f && query->time <= 1.0f));
-
- /* load the point query into SIMD registers */
- TravPointQuery<N> tquery(query->p, context->query_radius);
-
- /* initialize the node traverser */
- BVHNNodeTraverser1Hit<N, N, types> nodeTraverser;
-
- bool changed = false;
- float cull_radius = context->query_type == POINT_QUERY_TYPE_SPHERE
- ? query->radius * query->radius
- : dot(context->query_radius, context->query_radius);
-
- /* pop loop */
- while (true) pop:
- {
- /* pop next node */
- if (unlikely(stackPtr == stack)) break;
- stackPtr--;
- NodeRef cur = NodeRef(stackPtr->ptr);
-
- /* if popped node is too far, pop next one */
- if (unlikely(*(float*)&stackPtr->dist > cull_radius))
- continue;
-
- /* downtraversal loop */
- while (true)
- {
- /* intersect node */
- size_t mask; vfloat<N> tNear;
- STAT3(point_query.trav_nodes,1,1,1);
- bool nodeIntersected;
- if (likely(context->query_type == POINT_QUERY_TYPE_SPHERE)) {
- nodeIntersected = BVHNNodePointQuerySphere1<N, types>::pointQuery(cur, tquery, query->time, tNear, mask);
- } else {
- nodeIntersected = BVHNNodePointQueryAABB1 <N, types>::pointQuery(cur, tquery, query->time, tNear, mask);
- }
- if (unlikely(!nodeIntersected)) { STAT3(point_query.trav_nodes,-1,-1,-1); break; }
-
- /* if no child is hit, pop next node */
- if (unlikely(mask == 0))
- goto pop;
-
- /* select next child and push other children */
- nodeTraverser.traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
- }
-
- /* this is a leaf node */
- assert(cur != BVH::emptyNode);
- STAT3(point_query.trav_leaves,1,1,1);
- size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
- size_t lazy_node = 0;
- if (PrimitiveIntersector1::pointQuery(This, query, context, prim, num, tquery, lazy_node))
- {
- changed = true;
- tquery.rad = context->query_radius;
- cull_radius = context->query_type == POINT_QUERY_TYPE_SPHERE
- ? query->radius * query->radius
- : dot(context->query_radius, context->query_radius);
- }
-
- /* push lazy node onto stack */
- if (unlikely(lazy_node)) {
- stackPtr->ptr = lazy_node;
- stackPtr->dist = neg_inf;
- stackPtr++;
- }
- }
- return changed;
- }
- };
-
- /* disable point queries for not yet supported geometry types */
- template<int N, int types, bool robust>
- struct PointQueryDispatch<N, types, robust, VirtualCurveIntersector1> {
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
- };
-
- template<int N, int types, bool robust>
- struct PointQueryDispatch<N, types, robust, SubdivPatch1Intersector1> {
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
- };
-
- template<int N, int types, bool robust>
- struct PointQueryDispatch<N, types, robust, SubdivPatch1MBIntersector1> {
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
- };
-
- template<int N, int types, bool robust, typename PrimitiveIntersector1>
- bool BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::pointQuery(
- const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context)
- {
- return PointQueryDispatch<N, types, robust, PrimitiveIntersector1>::pointQuery(This, query, context);
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.h
deleted file mode 100644
index 1a269c319a..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.h
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-#include "../common/ray.h"
-#include "../common/point_query.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! BVH single ray intersector. */
- template<int N, int types, bool robust, typename PrimitiveIntersector1>
- class BVHNIntersector1
- {
- /* shortcuts for frequently used types */
- typedef typename PrimitiveIntersector1::Precalculations Precalculations;
- typedef typename PrimitiveIntersector1::Primitive Primitive;
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::AABBNode AABBNode;
- typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
-
- static const size_t stackSize = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
-
- /* right now AVX512KNL SIMD extension only for standard node types */
- static const size_t Nx = (types == BVH_AN1 || types == BVH_QN1) ? vextend<N>::size : N;
-
- public:
- static void intersect (const Accel::Intersectors* This, RayHit& ray, IntersectContext* context);
- static void occluded (const Accel::Intersectors* This, Ray& ray, IntersectContext* context);
- static bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context);
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1_bvh4.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1_bvh4.cpp
deleted file mode 100644
index 989f7354fd..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1_bvh4.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh_intersector1.cpp"
-
-namespace embree
-{
- namespace isa
- {
- int getISA() {
- return VerifyMultiTargetLinking::getISA();
- }
-
- ////////////////////////////////////////////////////////////////////////////////
- /// BVH4Intersector1 Definitions
- ////////////////////////////////////////////////////////////////////////////////
-
- IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersector1,BVHNIntersector1<4 COMMA BVH_AN1_UN1 COMMA false COMMA VirtualCurveIntersector1 >));
- IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersector1MB,BVHNIntersector1<4 COMMA BVH_AN2_AN4D_UN2 COMMA false COMMA VirtualCurveIntersector1 >));
-
- IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersectorRobust1,BVHNIntersector1<4 COMMA BVH_AN1_UN1 COMMA true COMMA VirtualCurveIntersector1 >));
- IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersectorRobust1MB,BVHNIntersector1<4 COMMA BVH_AN2_AN4D_UN2 COMMA true COMMA VirtualCurveIntersector1 >));
-
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4Intersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMIntersector1Moeller <SIMD_MODE(4) COMMA true> > >));
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Moeller <SIMD_MODE(4) COMMA true> > >));
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMvIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >));
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >));
-
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMvMBIntersector1Moeller <SIMD_MODE(4) COMMA true> > >));
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMiMBIntersector1Moeller <SIMD_MODE(4) COMMA true> > >));
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMvMBIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >));
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMiMBIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >));
-
- IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4vIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<QuadMvIntersector1Moeller <4 COMMA true> > >));
- IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<QuadMiIntersector1Moeller <4 COMMA true> > >));
- IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4vIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<QuadMvIntersector1Pluecker<4 COMMA true> > >));
- IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<QuadMiIntersector1Pluecker<4 COMMA true> > >));
-
- IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<QuadMiMBIntersector1Moeller <4 COMMA true> > >));
- IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<QuadMiMBIntersector1Pluecker<4 COMMA true> > >));
-
- IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR1(BVH4SubdivPatch1Intersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector1>));
- IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR1(BVH4SubdivPatch1MBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA SubdivPatch1MBIntersector1>));
-
- IF_ENABLED_USER(DEFINE_INTERSECTOR1(BVH4VirtualIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<ObjectIntersector1<false>> >));
- IF_ENABLED_USER(DEFINE_INTERSECTOR1(BVH4VirtualMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<ObjectIntersector1<true>> >));
-
- IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR1(BVH4InstanceIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<InstanceIntersector1> >));
- IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR1(BVH4InstanceMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<InstanceIntersector1MB> >));
-
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(QBVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_QN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >));
- IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(QBVH4Quad4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_QN1 COMMA false COMMA ArrayIntersector1<QuadMiIntersector1Pluecker<4 COMMA true> > >));
-
- IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridIntersector1Moeller,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersector1Moeller<4 COMMA true> >));
- IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridMBIntersector1Moeller,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA SubGridMBIntersector1Pluecker<4 COMMA true> >));
-
- IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA SubGridIntersector1Pluecker<4 COMMA true> >));
- //IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA SubGridMBIntersector1Pluecker<4 COMMA true> >));
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_hybrid.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_hybrid.h
deleted file mode 100644
index d764cc928d..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_hybrid.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-#include "../common/ray.h"
-#include "../common/stack_item.h"
-#include "node_intersector_frustum.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int K, bool robust>
- struct TravRayK;
-
- /*! BVH hybrid packet intersector. Switches between packet and single ray traversal (optional). */
- template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single = true>
- class BVHNIntersectorKHybrid
- {
- /* right now AVX512KNL SIMD extension only for standard node types */
- static const size_t Nx = types == BVH_AN1 ? vextend<N>::size : N;
-
- /* shortcuts for frequently used types */
- typedef typename PrimitiveIntersectorK::Precalculations Precalculations;
- typedef typename PrimitiveIntersectorK::Primitive Primitive;
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::BaseNode BaseNode;
- typedef typename BVH::AABBNode AABBNode;
-
- static const size_t stackSizeSingle = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
- static const size_t stackSizeChunk = 1+(N-1)*BVH::maxDepth;
-
- static const size_t switchThresholdIncoherent = \
- (K==4) ? 3 :
- (K==8) ? ((N==4) ? 5 : 7) :
- (K==16) ? 14 : // 14 seems to work best for KNL due to better ordered chunk traversal
- 0;
-
- private:
- static void intersect1(Accel::Intersectors* This, const BVH* bvh, NodeRef root, size_t k, Precalculations& pre,
- RayHitK<K>& ray, const TravRayK<K, robust>& tray, IntersectContext* context);
- static bool occluded1(Accel::Intersectors* This, const BVH* bvh, NodeRef root, size_t k, Precalculations& pre,
- RayK<K>& ray, const TravRayK<K, robust>& tray, IntersectContext* context);
-
- public:
- static void intersect(vint<K>* valid, Accel::Intersectors* This, RayHitK<K>& ray, IntersectContext* context);
- static void occluded (vint<K>* valid, Accel::Intersectors* This, RayK<K>& ray, IntersectContext* context);
-
- static void intersectCoherent(vint<K>* valid, Accel::Intersectors* This, RayHitK<K>& ray, IntersectContext* context);
- static void occludedCoherent (vint<K>* valid, Accel::Intersectors* This, RayK<K>& ray, IntersectContext* context);
-
- };
-
- /*! BVH packet intersector. */
- template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK>
- class BVHNIntersectorKChunk : public BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, false> {};
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream.h
deleted file mode 100644
index 83d1fb4d3d..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream.h
+++ /dev/null
@@ -1,295 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "node_intersector_packet_stream.h"
-#include "node_intersector_frustum.h"
-#include "bvh_traverser_stream.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! BVH ray stream intersector. */
- template<int N, int Nx, int types, bool robust, typename PrimitiveIntersector>
- class BVHNIntersectorStream
- {
- static const int Nxd = (Nx == N) ? N : Nx/2;
-
- /* shortcuts for frequently used types */
- template<int K> using PrimitiveIntersectorK = typename PrimitiveIntersector::template Type<K>;
- template<int K> using PrimitiveK = typename PrimitiveIntersectorK<K>::PrimitiveK;
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::BaseNode BaseNode;
- typedef typename BVH::AABBNode AABBNode;
- typedef typename BVH::AABBNodeMB AABBNodeMB;
-
- template<int K>
- __forceinline static size_t initPacketsAndFrustum(RayK<K>** inputPackets, size_t numOctantRays,
- TravRayKStream<K, robust>* packets, Frustum<robust>& frustum, bool& commonOctant)
- {
- const size_t numPackets = (numOctantRays+K-1)/K;
-
- Vec3vf<K> tmp_min_rdir(pos_inf);
- Vec3vf<K> tmp_max_rdir(neg_inf);
- Vec3vf<K> tmp_min_org(pos_inf);
- Vec3vf<K> tmp_max_org(neg_inf);
- vfloat<K> tmp_min_dist(pos_inf);
- vfloat<K> tmp_max_dist(neg_inf);
-
- size_t m_active = 0;
- for (size_t i = 0; i < numPackets; i++)
- {
- const vfloat<K> tnear = inputPackets[i]->tnear();
- const vfloat<K> tfar = inputPackets[i]->tfar;
- vbool<K> m_valid = (tnear <= tfar) & (tnear >= 0.0f);
-
-#if defined(EMBREE_IGNORE_INVALID_RAYS)
- m_valid &= inputPackets[i]->valid();
-#endif
-
- m_active |= (size_t)movemask(m_valid) << (i*K);
-
- vfloat<K> packet_min_dist = max(tnear, 0.0f);
- vfloat<K> packet_max_dist = select(m_valid, tfar, neg_inf);
- tmp_min_dist = min(tmp_min_dist, packet_min_dist);
- tmp_max_dist = max(tmp_max_dist, packet_max_dist);
-
- const Vec3vf<K>& org = inputPackets[i]->org;
- const Vec3vf<K>& dir = inputPackets[i]->dir;
-
- new (&packets[i]) TravRayKStream<K, robust>(org, dir, packet_min_dist, packet_max_dist);
-
- tmp_min_rdir = min(tmp_min_rdir, select(m_valid, packets[i].rdir, Vec3vf<K>(pos_inf)));
- tmp_max_rdir = max(tmp_max_rdir, select(m_valid, packets[i].rdir, Vec3vf<K>(neg_inf)));
- tmp_min_org = min(tmp_min_org , select(m_valid,org , Vec3vf<K>(pos_inf)));
- tmp_max_org = max(tmp_max_org , select(m_valid,org , Vec3vf<K>(neg_inf)));
- }
-
- m_active &= (numOctantRays == (8 * sizeof(size_t))) ? (size_t)-1 : (((size_t)1 << numOctantRays)-1);
-
-
- const Vec3fa reduced_min_rdir(reduce_min(tmp_min_rdir.x),
- reduce_min(tmp_min_rdir.y),
- reduce_min(tmp_min_rdir.z));
-
- const Vec3fa reduced_max_rdir(reduce_max(tmp_max_rdir.x),
- reduce_max(tmp_max_rdir.y),
- reduce_max(tmp_max_rdir.z));
-
- const Vec3fa reduced_min_origin(reduce_min(tmp_min_org.x),
- reduce_min(tmp_min_org.y),
- reduce_min(tmp_min_org.z));
-
- const Vec3fa reduced_max_origin(reduce_max(tmp_max_org.x),
- reduce_max(tmp_max_org.y),
- reduce_max(tmp_max_org.z));
-
- commonOctant =
- (reduced_max_rdir.x < 0.0f || reduced_min_rdir.x >= 0.0f) &&
- (reduced_max_rdir.y < 0.0f || reduced_min_rdir.y >= 0.0f) &&
- (reduced_max_rdir.z < 0.0f || reduced_min_rdir.z >= 0.0f);
-
- const float frustum_min_dist = reduce_min(tmp_min_dist);
- const float frustum_max_dist = reduce_max(tmp_max_dist);
-
- frustum.init(reduced_min_origin, reduced_max_origin,
- reduced_min_rdir, reduced_max_rdir,
- frustum_min_dist, frustum_max_dist,
- N);
-
- return m_active;
- }
-
- template<int K>
- __forceinline static size_t intersectAABBNodePacket(size_t m_active,
- const TravRayKStream<K,robust>* packets,
- const AABBNode* __restrict__ node,
- size_t boxID,
- const NearFarPrecalculations& nf)
- {
- assert(m_active);
- const size_t startPacketID = bsf(m_active) / K;
- const size_t endPacketID = bsr(m_active) / K;
- size_t m_trav_active = 0;
- for (size_t i = startPacketID; i <= endPacketID; i++)
- {
- const size_t m_hit = intersectNodeK<N>(node, boxID, packets[i], nf);
- m_trav_active |= m_hit << (i*K);
- }
- return m_trav_active;
- }
-
- template<int K>
- __forceinline static size_t traverseCoherentStream(size_t m_active,
- TravRayKStream<K, robust>* packets,
- const AABBNode* __restrict__ node,
- const Frustum<robust>& frustum,
- size_t* maskK,
- vfloat<Nx>& dist)
- {
- size_t m_node_hit = intersectNodeFrustum<N,Nx>(node, frustum, dist);
- const size_t first_index = bsf(m_active);
- const size_t first_packetID = first_index / K;
- const size_t first_rayID = first_index % K;
- size_t m_first_hit = intersectNode1<N,Nx>(node, packets[first_packetID], first_rayID, frustum.nf);
-
- /* this make traversal independent of the ordering of rays */
- size_t m_node = m_node_hit ^ m_first_hit;
- while (unlikely(m_node))
- {
- const size_t boxID = bscf(m_node);
- const size_t m_current = m_active & intersectAABBNodePacket(m_active, packets, node, boxID, frustum.nf);
- m_node_hit ^= m_current ? (size_t)0 : ((size_t)1 << boxID);
- maskK[boxID] = m_current;
- }
- return m_node_hit;
- }
-
- // TODO: explicit 16-wide path for KNL
- template<int K>
- __forceinline static vint<Nx> traverseIncoherentStream(size_t m_active,
- TravRayKStreamFast<K>* __restrict__ packets,
- const AABBNode* __restrict__ node,
- const NearFarPrecalculations& nf,
- const int shiftTable[32])
- {
- const vfloat<Nx> bminX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX));
- const vfloat<Nx> bminY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY));
- const vfloat<Nx> bminZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ));
- const vfloat<Nx> bmaxX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX));
- const vfloat<Nx> bmaxY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY));
- const vfloat<Nx> bmaxZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ));
- assert(m_active);
- vint<Nx> vmask(zero);
- do
- {
- STAT3(shadow.trav_nodes,1,1,1);
- const size_t rayID = bscf(m_active);
- assert(rayID < MAX_INTERNAL_STREAM_SIZE);
- TravRayKStream<K,robust> &p = packets[rayID / K];
- const size_t i = rayID % K;
- const vint<Nx> bitmask(shiftTable[rayID]);
-
-#if defined (__aarch64__)
- const vfloat<Nx> tNearX = madd(bminX, p.rdir.x[i], p.neg_org_rdir.x[i]);
- const vfloat<Nx> tNearY = madd(bminY, p.rdir.y[i], p.neg_org_rdir.y[i]);
- const vfloat<Nx> tNearZ = madd(bminZ, p.rdir.z[i], p.neg_org_rdir.z[i]);
- const vfloat<Nx> tFarX = madd(bmaxX, p.rdir.x[i], p.neg_org_rdir.x[i]);
- const vfloat<Nx> tFarY = madd(bmaxY, p.rdir.y[i], p.neg_org_rdir.y[i]);
- const vfloat<Nx> tFarZ = madd(bmaxZ, p.rdir.z[i], p.neg_org_rdir.z[i]);
-#else
- const vfloat<Nx> tNearX = msub(bminX, p.rdir.x[i], p.org_rdir.x[i]);
- const vfloat<Nx> tNearY = msub(bminY, p.rdir.y[i], p.org_rdir.y[i]);
- const vfloat<Nx> tNearZ = msub(bminZ, p.rdir.z[i], p.org_rdir.z[i]);
- const vfloat<Nx> tFarX = msub(bmaxX, p.rdir.x[i], p.org_rdir.x[i]);
- const vfloat<Nx> tFarY = msub(bmaxY, p.rdir.y[i], p.org_rdir.y[i]);
- const vfloat<Nx> tFarZ = msub(bmaxZ, p.rdir.z[i], p.org_rdir.z[i]);
-#endif
-
- const vfloat<Nx> tNear = maxi(tNearX, tNearY, tNearZ, vfloat<Nx>(p.tnear[i]));
- const vfloat<Nx> tFar = mini(tFarX , tFarY , tFarZ, vfloat<Nx>(p.tfar[i]));
-
-#if defined(__AVX512ER__)
- const vboolx m_node((1 << N)-1);
- const vbool<Nx> hit_mask = le(m_node, tNear, tFar);
- vmask = mask_or(hit_mask, vmask, vmask, bitmask);
-#else
- const vbool<Nx> hit_mask = tNear <= tFar;
-#if defined(__AVX2__)
- vmask = vmask | (bitmask & vint<Nx>(hit_mask));
-#else
- vmask = select(hit_mask, vmask | bitmask, vmask);
-#endif
-#endif
- } while(m_active);
- return vmask;
- }
-
- template<int K>
- __forceinline static vint<Nx> traverseIncoherentStream(size_t m_active,
- TravRayKStreamRobust<K>* __restrict__ packets,
- const AABBNode* __restrict__ node,
- const NearFarPrecalculations& nf,
- const int shiftTable[32])
- {
- const vfloat<Nx> bminX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX));
- const vfloat<Nx> bminY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY));
- const vfloat<Nx> bminZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ));
- const vfloat<Nx> bmaxX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX));
- const vfloat<Nx> bmaxY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY));
- const vfloat<Nx> bmaxZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ));
- assert(m_active);
- vint<Nx> vmask(zero);
- do
- {
- STAT3(shadow.trav_nodes,1,1,1);
- const size_t rayID = bscf(m_active);
- assert(rayID < MAX_INTERNAL_STREAM_SIZE);
- TravRayKStream<K,robust> &p = packets[rayID / K];
- const size_t i = rayID % K;
- const vint<Nx> bitmask(shiftTable[rayID]);
- const vfloat<Nx> tNearX = (bminX - p.org.x[i]) * p.rdir.x[i];
- const vfloat<Nx> tNearY = (bminY - p.org.y[i]) * p.rdir.y[i];
- const vfloat<Nx> tNearZ = (bminZ - p.org.z[i]) * p.rdir.z[i];
- const vfloat<Nx> tFarX = (bmaxX - p.org.x[i]) * p.rdir.x[i];
- const vfloat<Nx> tFarY = (bmaxY - p.org.y[i]) * p.rdir.y[i];
- const vfloat<Nx> tFarZ = (bmaxZ - p.org.z[i]) * p.rdir.z[i];
- const vfloat<Nx> tNear = maxi(tNearX, tNearY, tNearZ, vfloat<Nx>(p.tnear[i]));
- const vfloat<Nx> tFar = mini(tFarX , tFarY , tFarZ, vfloat<Nx>(p.tfar[i]));
- const float round_down = 1.0f-2.0f*float(ulp);
- const float round_up = 1.0f+2.0f*float(ulp);
-#if defined(__AVX512ER__)
- const vboolx m_node((1 << N)-1);
- const vbool<Nx> hit_mask = le(m_node, round_down*tNear, round_up*tFar);
- vmask = mask_or(hit_mask, vmask, vmask, bitmask);
-#else
- const vbool<Nx> hit_mask = round_down*tNear <= round_up*tFar;
-#if defined(__AVX2__)
- vmask = vmask | (bitmask & vint<Nx>(hit_mask));
-#else
- vmask = select(hit_mask, vmask | bitmask, vmask);
-#endif
-#endif
- } while(m_active);
- return vmask;
- }
-
-
- static const size_t stackSizeSingle = 1+(N-1)*BVH::maxDepth;
-
- public:
- static void intersect(Accel::Intersectors* This, RayHitN** inputRays, size_t numRays, IntersectContext* context);
- static void occluded (Accel::Intersectors* This, RayN** inputRays, size_t numRays, IntersectContext* context);
-
- private:
- template<int K>
- static void intersectCoherent(Accel::Intersectors* This, RayHitK<K>** inputRays, size_t numRays, IntersectContext* context);
-
- template<int K>
- static void occludedCoherent(Accel::Intersectors* This, RayK<K>** inputRays, size_t numRays, IntersectContext* context);
-
- template<int K>
- static void occludedIncoherent(Accel::Intersectors* This, RayK<K>** inputRays, size_t numRays, IntersectContext* context);
- };
-
-
- /*! BVH ray stream intersector with direct fallback to packets. */
- template<int N, int Nx>
- class BVHNIntersectorStreamPacketFallback
- {
- public:
- static void intersect(Accel::Intersectors* This, RayHitN** inputRays, size_t numRays, IntersectContext* context);
- static void occluded (Accel::Intersectors* This, RayN** inputRays, size_t numRays, IntersectContext* context);
-
- private:
- template<int K>
- static void intersectK(Accel::Intersectors* This, RayHitK<K>** inputRays, size_t numRays, IntersectContext* context);
-
- template<int K>
- static void occludedK(Accel::Intersectors* This, RayK<K>** inputRays, size_t numRays, IntersectContext* context);
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream_filters.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream_filters.h
deleted file mode 100644
index cdeb923637..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream_filters.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/default.h"
-#include "../common/ray.h"
-#include "../common/scene.h"
-
-namespace embree
-{
- namespace isa
- {
- class RayStreamFilter
- {
- public:
- static void intersectAOS(Scene* scene, RTCRayHit* rays, size_t N, size_t stride, IntersectContext* context);
- static void intersectAOP(Scene* scene, RTCRayHit** rays, size_t N, IntersectContext* context);
- static void intersectSOA(Scene* scene, char* rays, size_t N, size_t numPackets, size_t stride, IntersectContext* context);
- static void intersectSOP(Scene* scene, const RTCRayHitNp* rays, size_t N, IntersectContext* context);
-
- static void occludedAOS(Scene* scene, RTCRay* rays, size_t N, size_t stride, IntersectContext* context);
- static void occludedAOP(Scene* scene, RTCRay** rays, size_t N, IntersectContext* context);
- static void occludedSOA(Scene* scene, char* rays, size_t N, size_t numPackets, size_t stride, IntersectContext* context);
- static void occludedSOP(Scene* scene, const RTCRayNp* rays, size_t N, IntersectContext* context);
-
- private:
- template<int K, bool intersect>
- static void filterAOS(Scene* scene, void* rays, size_t N, size_t stride, IntersectContext* context);
-
- template<int K, bool intersect>
- static void filterAOP(Scene* scene, void** rays, size_t N, IntersectContext* context);
-
- template<int K, bool intersect>
- static void filterSOA(Scene* scene, char* rays, size_t N, size_t numPackets, size_t stride, IntersectContext* context);
-
- template<int K, bool intersect>
- static void filterSOP(Scene* scene, const void* rays, size_t N, IntersectContext* context);
- };
- }
-};
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb.h
deleted file mode 100644
index baa4a8d805..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb.h
+++ /dev/null
@@ -1,213 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_node_base.h"
-
-namespace embree
-{
- /*! BVHN AABBNode */
- template<typename NodeRef, int N>
- struct AABBNode_t : public BaseNode_t<NodeRef, N>
- {
- using BaseNode_t<NodeRef,N>::children;
-
- struct Create
- {
- __forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc, size_t numChildren = 0) const
- {
- AABBNode_t* node = (AABBNode_t*) alloc.malloc0(sizeof(AABBNode_t),NodeRef::byteNodeAlignment); node->clear();
- return NodeRef::encodeNode(node);
- }
- };
-
- struct Set
- {
- __forceinline void operator() (NodeRef node, size_t i, NodeRef child, const BBox3fa& bounds) const {
- node.getAABBNode()->setRef(i,child);
- node.getAABBNode()->setBounds(i,bounds);
- }
- };
-
- struct Create2
- {
- template<typename BuildRecord>
- __forceinline NodeRef operator() (BuildRecord* children, const size_t num, const FastAllocator::CachedAllocator& alloc) const
- {
- AABBNode_t* node = (AABBNode_t*) alloc.malloc0(sizeof(AABBNode_t), NodeRef::byteNodeAlignment); node->clear();
- for (size_t i=0; i<num; i++) node->setBounds(i,children[i].bounds());
- return NodeRef::encodeNode(node);
- }
- };
-
- struct Set2
- {
- template<typename BuildRecord>
- __forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
- {
- AABBNode_t* node = ref.getAABBNode();
- for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
- return ref;
- }
- };
-
- struct Set3
- {
- Set3 (FastAllocator* allocator, PrimRef* prims)
- : allocator(allocator), prims(prims) {}
-
- template<typename BuildRecord>
- __forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
- {
- AABBNode_t* node = ref.getAABBNode();
- for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
-
- if (unlikely(precord.alloc_barrier))
- {
- PrimRef* begin = &prims[precord.prims.begin()];
- PrimRef* end = &prims[precord.prims.end()]; // FIXME: extended end for spatial split builder!!!!!
- size_t bytes = (size_t)end - (size_t)begin;
- allocator->addBlock(begin,bytes);
- }
-
- return ref;
- }
-
- FastAllocator* const allocator;
- PrimRef* const prims;
- };
-
- /*! Clears the node. */
- __forceinline void clear() {
- lower_x = lower_y = lower_z = pos_inf;
- upper_x = upper_y = upper_z = neg_inf;
- BaseNode_t<NodeRef,N>::clear();
- }
-
- /*! Sets bounding box and ID of child. */
- __forceinline void setRef(size_t i, const NodeRef& ref) {
- assert(i < N);
- children[i] = ref;
- }
-
- /*! Sets bounding box of child. */
- __forceinline void setBounds(size_t i, const BBox3fa& bounds)
- {
- assert(i < N);
- lower_x[i] = bounds.lower.x; lower_y[i] = bounds.lower.y; lower_z[i] = bounds.lower.z;
- upper_x[i] = bounds.upper.x; upper_y[i] = bounds.upper.y; upper_z[i] = bounds.upper.z;
- }
-
- /*! Sets bounding box and ID of child. */
- __forceinline void set(size_t i, const NodeRef& ref, const BBox3fa& bounds) {
- setBounds(i,bounds);
- children[i] = ref;
- }
-
- /*! Returns bounds of node. */
- __forceinline BBox3fa bounds() const {
- const Vec3fa lower(reduce_min(lower_x),reduce_min(lower_y),reduce_min(lower_z));
- const Vec3fa upper(reduce_max(upper_x),reduce_max(upper_y),reduce_max(upper_z));
- return BBox3fa(lower,upper);
- }
-
- /*! Returns bounds of specified child. */
- __forceinline BBox3fa bounds(size_t i) const
- {
- assert(i < N);
- const Vec3fa lower(lower_x[i],lower_y[i],lower_z[i]);
- const Vec3fa upper(upper_x[i],upper_y[i],upper_z[i]);
- return BBox3fa(lower,upper);
- }
-
- /*! Returns extent of bounds of specified child. */
- __forceinline Vec3fa extend(size_t i) const {
- return bounds(i).size();
- }
-
- /*! Returns bounds of all children (implemented later as specializations) */
- __forceinline void bounds(BBox<vfloat4>& bounds0, BBox<vfloat4>& bounds1, BBox<vfloat4>& bounds2, BBox<vfloat4>& bounds3) const;
-
- /*! swap two children of the node */
- __forceinline void swap(size_t i, size_t j)
- {
- assert(i<N && j<N);
- std::swap(children[i],children[j]);
- std::swap(lower_x[i],lower_x[j]);
- std::swap(lower_y[i],lower_y[j]);
- std::swap(lower_z[i],lower_z[j]);
- std::swap(upper_x[i],upper_x[j]);
- std::swap(upper_y[i],upper_y[j]);
- std::swap(upper_z[i],upper_z[j]);
- }
-
- /*! swap the children of two nodes */
- __forceinline static void swap(AABBNode_t* a, size_t i, AABBNode_t* b, size_t j)
- {
- assert(i<N && j<N);
- std::swap(a->children[i],b->children[j]);
- std::swap(a->lower_x[i],b->lower_x[j]);
- std::swap(a->lower_y[i],b->lower_y[j]);
- std::swap(a->lower_z[i],b->lower_z[j]);
- std::swap(a->upper_x[i],b->upper_x[j]);
- std::swap(a->upper_y[i],b->upper_y[j]);
- std::swap(a->upper_z[i],b->upper_z[j]);
- }
-
- /*! compacts a node (moves empty children to the end) */
- __forceinline static void compact(AABBNode_t* a)
- {
- /* find right most filled node */
- ssize_t j=N;
- for (j=j-1; j>=0; j--)
- if (a->child(j) != NodeRef::emptyNode)
- break;
-
- /* replace empty nodes with filled nodes */
- for (ssize_t i=0; i<j; i++) {
- if (a->child(i) == NodeRef::emptyNode) {
- a->swap(i,j);
- for (j=j-1; j>i; j--)
- if (a->child(j) != NodeRef::emptyNode)
- break;
- }
- }
- }
-
- /*! Returns reference to specified child */
- __forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
- __forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
-
- /*! output operator */
- friend embree_ostream operator<<(embree_ostream o, const AABBNode_t& n)
- {
- o << "AABBNode { " << embree_endl;
- o << " lower_x " << n.lower_x << embree_endl;
- o << " upper_x " << n.upper_x << embree_endl;
- o << " lower_y " << n.lower_y << embree_endl;
- o << " upper_y " << n.upper_y << embree_endl;
- o << " lower_z " << n.lower_z << embree_endl;
- o << " upper_z " << n.upper_z << embree_endl;
- o << " children = ";
- for (size_t i=0; i<N; i++) o << n.children[i] << " ";
- o << embree_endl;
- o << "}" << embree_endl;
- return o;
- }
-
- public:
- vfloat<N> lower_x; //!< X dimension of lower bounds of all N children.
- vfloat<N> upper_x; //!< X dimension of upper bounds of all N children.
- vfloat<N> lower_y; //!< Y dimension of lower bounds of all N children.
- vfloat<N> upper_y; //!< Y dimension of upper bounds of all N children.
- vfloat<N> lower_z; //!< Z dimension of lower bounds of all N children.
- vfloat<N> upper_z; //!< Z dimension of upper bounds of all N children.
- };
-
- template<>
- __forceinline void AABBNode_t<NodeRefPtr<4>,4>::bounds(BBox<vfloat4>& bounds0, BBox<vfloat4>& bounds1, BBox<vfloat4>& bounds2, BBox<vfloat4>& bounds3) const {
- transpose(lower_x,lower_y,lower_z,vfloat4(zero),bounds0.lower,bounds1.lower,bounds2.lower,bounds3.lower);
- transpose(upper_x,upper_y,upper_z,vfloat4(zero),bounds0.upper,bounds1.upper,bounds2.upper,bounds3.upper);
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb.h
deleted file mode 100644
index 501f4bce5b..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb.h
+++ /dev/null
@@ -1,247 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_node_base.h"
-
-namespace embree
-{
- /*! Motion Blur AABBNode */
- template<typename NodeRef, int N>
- struct AABBNodeMB_t : public BaseNode_t<NodeRef, N>
- {
- using BaseNode_t<NodeRef,N>::children;
- typedef BVHNodeRecord<NodeRef> NodeRecord;
- typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
- typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
-
- struct Create
- {
- template<typename BuildRecord>
- __forceinline NodeRef operator() (BuildRecord* children, const size_t num, const FastAllocator::CachedAllocator& alloc) const
- {
- AABBNodeMB_t* node = (AABBNodeMB_t*) alloc.malloc0(sizeof(AABBNodeMB_t),NodeRef::byteNodeAlignment); node->clear();
- return NodeRef::encodeNode(node);
- }
- };
-
- struct Set
- {
- template<typename BuildRecord>
- __forceinline NodeRecordMB operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRecordMB* children, const size_t num) const
- {
- AABBNodeMB_t* node = ref.getAABBNodeMB();
-
- LBBox3fa bounds = empty;
- for (size_t i=0; i<num; i++) {
- node->setRef(i,children[i].ref);
- node->setBounds(i,children[i].lbounds);
- bounds.extend(children[i].lbounds);
- }
- return NodeRecordMB(ref,bounds);
- }
- };
-
- struct SetTimeRange
- {
- __forceinline SetTimeRange(BBox1f tbounds) : tbounds(tbounds) {}
-
- template<typename BuildRecord>
- __forceinline NodeRecordMB operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRecordMB* children, const size_t num) const
- {
- AABBNodeMB_t* node = ref.getAABBNodeMB();
-
- LBBox3fa bounds = empty;
- for (size_t i=0; i<num; i++) {
- node->setRef(i, children[i].ref);
- node->setBounds(i, children[i].lbounds, tbounds);
- bounds.extend(children[i].lbounds);
- }
- return NodeRecordMB(ref,bounds);
- }
-
- BBox1f tbounds;
- };
-
- /*! Clears the node. */
- __forceinline void clear() {
- lower_x = lower_y = lower_z = vfloat<N>(pos_inf);
- upper_x = upper_y = upper_z = vfloat<N>(neg_inf);
- lower_dx = lower_dy = lower_dz = vfloat<N>(0.0f);
- upper_dx = upper_dy = upper_dz = vfloat<N>(0.0f);
- BaseNode_t<NodeRef,N>::clear();
- }
-
- /*! Sets ID of child. */
- __forceinline void setRef(size_t i, NodeRef ref) {
- children[i] = ref;
- }
-
- /*! Sets bounding box of child. */
- __forceinline void setBounds(size_t i, const BBox3fa& bounds0_i, const BBox3fa& bounds1_i)
- {
- /*! for empty bounds we have to avoid inf-inf=nan */
- BBox3fa bounds0(min(bounds0_i.lower,Vec3fa(+FLT_MAX)),max(bounds0_i.upper,Vec3fa(-FLT_MAX)));
- BBox3fa bounds1(min(bounds1_i.lower,Vec3fa(+FLT_MAX)),max(bounds1_i.upper,Vec3fa(-FLT_MAX)));
- bounds0 = bounds0.enlarge_by(4.0f*float(ulp));
- bounds1 = bounds1.enlarge_by(4.0f*float(ulp));
- Vec3fa dlower = bounds1.lower-bounds0.lower;
- Vec3fa dupper = bounds1.upper-bounds0.upper;
-
- lower_x[i] = bounds0.lower.x; lower_y[i] = bounds0.lower.y; lower_z[i] = bounds0.lower.z;
- upper_x[i] = bounds0.upper.x; upper_y[i] = bounds0.upper.y; upper_z[i] = bounds0.upper.z;
-
- lower_dx[i] = dlower.x; lower_dy[i] = dlower.y; lower_dz[i] = dlower.z;
- upper_dx[i] = dupper.x; upper_dy[i] = dupper.y; upper_dz[i] = dupper.z;
- }
-
- /*! Sets bounding box of child. */
- __forceinline void setBounds(size_t i, const LBBox3fa& bounds) {
- setBounds(i, bounds.bounds0, bounds.bounds1);
- }
-
- /*! Sets bounding box of child. */
- __forceinline void setBounds(size_t i, const LBBox3fa& bounds, const BBox1f& tbounds) {
- setBounds(i, bounds.global(tbounds));
- }
-
- /*! Sets bounding box and ID of child. */
- __forceinline void set(size_t i, NodeRef ref, const BBox3fa& bounds) {
- lower_x[i] = bounds.lower.x; lower_y[i] = bounds.lower.y; lower_z[i] = bounds.lower.z;
- upper_x[i] = bounds.upper.x; upper_y[i] = bounds.upper.y; upper_z[i] = bounds.upper.z;
- children[i] = ref;
- }
-
- /*! Sets bounding box and ID of child. */
- __forceinline void set(size_t i, const NodeRecordMB4D& child)
- {
- setRef(i, child.ref);
- setBounds(i, child.lbounds, child.dt);
- }
-
- /*! Return bounding box for time 0 */
- __forceinline BBox3fa bounds0(size_t i) const {
- return BBox3fa(Vec3fa(lower_x[i],lower_y[i],lower_z[i]),
- Vec3fa(upper_x[i],upper_y[i],upper_z[i]));
- }
-
- /*! Return bounding box for time 1 */
- __forceinline BBox3fa bounds1(size_t i) const {
- return BBox3fa(Vec3fa(lower_x[i]+lower_dx[i],lower_y[i]+lower_dy[i],lower_z[i]+lower_dz[i]),
- Vec3fa(upper_x[i]+upper_dx[i],upper_y[i]+upper_dy[i],upper_z[i]+upper_dz[i]));
- }
-
- /*! Returns bounds of node. */
- __forceinline BBox3fa bounds() const {
- return BBox3fa(Vec3fa(reduce_min(min(lower_x,lower_x+lower_dx)),
- reduce_min(min(lower_y,lower_y+lower_dy)),
- reduce_min(min(lower_z,lower_z+lower_dz))),
- Vec3fa(reduce_max(max(upper_x,upper_x+upper_dx)),
- reduce_max(max(upper_y,upper_y+upper_dy)),
- reduce_max(max(upper_z,upper_z+upper_dz))));
- }
-
- /*! Return bounding box of child i */
- __forceinline BBox3fa bounds(size_t i) const {
- return merge(bounds0(i),bounds1(i));
- }
-
- /*! Return linear bounding box of child i */
- __forceinline LBBox3fa lbounds(size_t i) const {
- return LBBox3fa(bounds0(i),bounds1(i));
- }
-
- /*! Return bounding box of child i at specified time */
- __forceinline BBox3fa bounds(size_t i, float time) const {
- return lerp(bounds0(i),bounds1(i),time);
- }
-
- /*! Returns the expected surface area when randomly sampling the time. */
- __forceinline float expectedHalfArea(size_t i) const {
- return lbounds(i).expectedHalfArea();
- }
-
- /*! Returns the expected surface area when randomly sampling the time. */
- __forceinline float expectedHalfArea(size_t i, const BBox1f& t0t1) const {
- return lbounds(i).expectedHalfArea(t0t1);
- }
-
- /*! swap two children of the node */
- __forceinline void swap(size_t i, size_t j)
- {
- assert(i<N && j<N);
- std::swap(children[i],children[j]);
-
- std::swap(lower_x[i],lower_x[j]);
- std::swap(upper_x[i],upper_x[j]);
- std::swap(lower_y[i],lower_y[j]);
- std::swap(upper_y[i],upper_y[j]);
- std::swap(lower_z[i],lower_z[j]);
- std::swap(upper_z[i],upper_z[j]);
-
- std::swap(lower_dx[i],lower_dx[j]);
- std::swap(upper_dx[i],upper_dx[j]);
- std::swap(lower_dy[i],lower_dy[j]);
- std::swap(upper_dy[i],upper_dy[j]);
- std::swap(lower_dz[i],lower_dz[j]);
- std::swap(upper_dz[i],upper_dz[j]);
- }
-
- /*! compacts a node (moves empty children to the end) */
- __forceinline static void compact(AABBNodeMB_t* a)
- {
- /* find right most filled node */
- ssize_t j=N;
- for (j=j-1; j>=0; j--)
- if (a->child(j) != NodeRef::emptyNode)
- break;
-
- /* replace empty nodes with filled nodes */
- for (ssize_t i=0; i<j; i++) {
- if (a->child(i) == NodeRef::emptyNode) {
- a->swap(i,j);
- for (j=j-1; j>i; j--)
- if (a->child(j) != NodeRef::emptyNode)
- break;
- }
- }
- }
-
- /*! Returns reference to specified child */
- __forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
- __forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
-
- /*! stream output operator */
- friend embree_ostream operator<<(embree_ostream cout, const AABBNodeMB_t& n)
- {
- cout << "AABBNodeMB {" << embree_endl;
- for (size_t i=0; i<N; i++)
- {
- const BBox3fa b0 = n.bounds0(i);
- const BBox3fa b1 = n.bounds1(i);
- cout << " child" << i << " { " << embree_endl;
- cout << " bounds0 = " << b0 << ", " << embree_endl;
- cout << " bounds1 = " << b1 << ", " << embree_endl;
- cout << " }";
- }
- cout << "}";
- return cout;
- }
-
- public:
- vfloat<N> lower_x; //!< X dimension of lower bounds of all N children.
- vfloat<N> upper_x; //!< X dimension of upper bounds of all N children.
- vfloat<N> lower_y; //!< Y dimension of lower bounds of all N children.
- vfloat<N> upper_y; //!< Y dimension of upper bounds of all N children.
- vfloat<N> lower_z; //!< Z dimension of lower bounds of all N children.
- vfloat<N> upper_z; //!< Z dimension of upper bounds of all N children.
-
- vfloat<N> lower_dx; //!< X dimension of lower bounds of all N children.
- vfloat<N> upper_dx; //!< X dimension of upper bounds of all N children.
- vfloat<N> lower_dy; //!< Y dimension of lower bounds of all N children.
- vfloat<N> upper_dy; //!< Y dimension of upper bounds of all N children.
- vfloat<N> lower_dz; //!< Z dimension of lower bounds of all N children.
- vfloat<N> upper_dz; //!< Z dimension of upper bounds of all N children.
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb4d.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb4d.h
deleted file mode 100644
index e968bbbc39..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb4d.h
+++ /dev/null
@@ -1,107 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_node_aabb_mb.h"
-
-namespace embree
-{
- /*! Aligned 4D Motion Blur Node */
- template<typename NodeRef, int N>
- struct AABBNodeMB4D_t : public AABBNodeMB_t<NodeRef, N>
- {
- using BaseNode_t<NodeRef,N>::children;
- using AABBNodeMB_t<NodeRef,N>::set;
-
- typedef BVHNodeRecord<NodeRef> NodeRecord;
- typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
- typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
-
- struct Create
- {
- template<typename BuildRecord>
- __forceinline NodeRef operator() (BuildRecord*, const size_t, const FastAllocator::CachedAllocator& alloc, bool hasTimeSplits = true) const
- {
- if (hasTimeSplits)
- {
- AABBNodeMB4D_t* node = (AABBNodeMB4D_t*) alloc.malloc0(sizeof(AABBNodeMB4D_t),NodeRef::byteNodeAlignment); node->clear();
- return NodeRef::encodeNode(node);
- }
- else
- {
- AABBNodeMB_t<NodeRef,N>* node = (AABBNodeMB_t<NodeRef,N>*) alloc.malloc0(sizeof(AABBNodeMB_t<NodeRef,N>),NodeRef::byteNodeAlignment); node->clear();
- return NodeRef::encodeNode(node);
- }
- }
- };
-
- struct Set
- {
- template<typename BuildRecord>
- __forceinline void operator() (const BuildRecord&, const BuildRecord*, NodeRef ref, NodeRecordMB4D* children, const size_t num) const
- {
- if (likely(ref.isAABBNodeMB())) {
- for (size_t i=0; i<num; i++)
- ref.getAABBNodeMB()->set(i, children[i]);
- } else {
- for (size_t i=0; i<num; i++)
- ref.getAABBNodeMB4D()->set(i, children[i]);
- }
- }
- };
-
- /*! Clears the node. */
- __forceinline void clear() {
- lower_t = vfloat<N>(pos_inf);
- upper_t = vfloat<N>(neg_inf);
- AABBNodeMB_t<NodeRef,N>::clear();
- }
-
- /*! Sets bounding box of child. */
- __forceinline void setBounds(size_t i, const LBBox3fa& bounds, const BBox1f& tbounds)
- {
- AABBNodeMB_t<NodeRef,N>::setBounds(i, bounds.global(tbounds));
- lower_t[i] = tbounds.lower;
- upper_t[i] = tbounds.upper == 1.0f ? 1.0f+float(ulp) : tbounds.upper;
- }
-
- /*! Sets bounding box and ID of child. */
- __forceinline void set(size_t i, const NodeRecordMB4D& child) {
- AABBNodeMB_t<NodeRef,N>::setRef(i,child.ref);
- setBounds(i, child.lbounds, child.dt);
- }
-
- /*! Returns the expected surface area when randomly sampling the time. */
- __forceinline float expectedHalfArea(size_t i) const {
- return AABBNodeMB_t<NodeRef,N>::lbounds(i).expectedHalfArea(timeRange(i));
- }
-
- /*! returns time range for specified child */
- __forceinline BBox1f timeRange(size_t i) const {
- return BBox1f(lower_t[i],upper_t[i]);
- }
-
- /*! stream output operator */
- friend embree_ostream operator<<(embree_ostream cout, const AABBNodeMB4D_t& n)
- {
- cout << "AABBNodeMB4D {" << embree_endl;
- for (size_t i=0; i<N; i++)
- {
- const BBox3fa b0 = n.bounds0(i);
- const BBox3fa b1 = n.bounds1(i);
- cout << " child" << i << " { " << embree_endl;
- cout << " bounds0 = " << lerp(b0,b1,n.lower_t[i]) << ", " << embree_endl;
- cout << " bounds1 = " << lerp(b0,b1,n.upper_t[i]) << ", " << embree_endl;
- cout << " time_bounds = " << n.lower_t[i] << ", " << n.upper_t[i] << embree_endl;
- cout << " }";
- }
- cout << "}";
- return cout;
- }
-
- public:
- vfloat<N> lower_t; //!< time dimension of lower bounds of all N children
- vfloat<N> upper_t; //!< time dimension of upper bounds of all N children
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_base.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_base.h
deleted file mode 100644
index 8268f3b932..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_base.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_node_ref.h"
-
-namespace embree
-{
-
- /*! BVHN Base Node */
- template<typename NodeRef, int N>
- struct BaseNode_t
- {
- /*! Clears the node. */
- __forceinline void clear()
- {
- for (size_t i=0; i<N; i++)
- children[i] = NodeRef::emptyNode;
- }
-
- /*! Returns reference to specified child */
- __forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
- __forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
-
- /*! verifies the node */
- __forceinline bool verify() const
- {
- for (size_t i=0; i<N; i++) {
- if (child(i) == NodeRef::emptyNode) {
- for (; i<N; i++) {
- if (child(i) != NodeRef::emptyNode)
- return false;
- }
- break;
- }
- }
- return true;
- }
-
- NodeRef children[N]; //!< Pointer to the N children (can be a node or leaf)
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb.h
deleted file mode 100644
index fa7cc08211..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_node_base.h"
-
-namespace embree
-{
- /*! Node with unaligned bounds */
- template<typename NodeRef, int N>
- struct OBBNode_t : public BaseNode_t<NodeRef, N>
- {
- using BaseNode_t<NodeRef,N>::children;
-
- struct Create
- {
- __forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc) const
- {
- OBBNode_t* node = (OBBNode_t*) alloc.malloc0(sizeof(OBBNode_t),NodeRef::byteNodeAlignment); node->clear();
- return NodeRef::encodeNode(node);
- }
- };
-
- struct Set
- {
- __forceinline void operator() (NodeRef node, size_t i, NodeRef child, const OBBox3fa& bounds) const {
- node.ungetAABBNode()->setRef(i,child);
- node.ungetAABBNode()->setBounds(i,bounds);
- }
- };
-
- /*! Clears the node. */
- __forceinline void clear()
- {
- naabb.l.vx = Vec3fa(nan);
- naabb.l.vy = Vec3fa(nan);
- naabb.l.vz = Vec3fa(nan);
- naabb.p = Vec3fa(nan);
- BaseNode_t<NodeRef,N>::clear();
- }
-
- /*! Sets bounding box. */
- __forceinline void setBounds(size_t i, const OBBox3fa& b)
- {
- assert(i < N);
-
- AffineSpace3fa space = b.space;
- space.p -= b.bounds.lower;
- space = AffineSpace3fa::scale(1.0f/max(Vec3fa(1E-19f),b.bounds.upper-b.bounds.lower))*space;
-
- naabb.l.vx.x[i] = space.l.vx.x;
- naabb.l.vx.y[i] = space.l.vx.y;
- naabb.l.vx.z[i] = space.l.vx.z;
-
- naabb.l.vy.x[i] = space.l.vy.x;
- naabb.l.vy.y[i] = space.l.vy.y;
- naabb.l.vy.z[i] = space.l.vy.z;
-
- naabb.l.vz.x[i] = space.l.vz.x;
- naabb.l.vz.y[i] = space.l.vz.y;
- naabb.l.vz.z[i] = space.l.vz.z;
-
- naabb.p.x[i] = space.p.x;
- naabb.p.y[i] = space.p.y;
- naabb.p.z[i] = space.p.z;
- }
-
- /*! Sets ID of child. */
- __forceinline void setRef(size_t i, const NodeRef& ref) {
- assert(i < N);
- children[i] = ref;
- }
-
- /*! Returns the extent of the bounds of the ith child */
- __forceinline Vec3fa extent(size_t i) const {
- assert(i<N);
- const Vec3fa vx(naabb.l.vx.x[i],naabb.l.vx.y[i],naabb.l.vx.z[i]);
- const Vec3fa vy(naabb.l.vy.x[i],naabb.l.vy.y[i],naabb.l.vy.z[i]);
- const Vec3fa vz(naabb.l.vz.x[i],naabb.l.vz.y[i],naabb.l.vz.z[i]);
- return rsqrt(vx*vx + vy*vy + vz*vz);
- }
-
- /*! Returns reference to specified child */
- __forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
- __forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
-
- /*! output operator */
- friend embree_ostream operator<<(embree_ostream o, const OBBNode_t& n)
- {
- o << "UnAABBNode { " << n.naabb << " } " << embree_endl;
- return o;
- }
-
- public:
- AffineSpace3vf<N> naabb; //!< non-axis aligned bounding boxes (bounds are [0,1] in specified space)
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb_mb.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb_mb.h
deleted file mode 100644
index 834cf5ec28..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb_mb.h
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_node_base.h"
-
-namespace embree
-{
- template<typename NodeRef, int N>
- struct OBBNodeMB_t : public BaseNode_t<NodeRef, N>
- {
- using BaseNode_t<NodeRef,N>::children;
-
- struct Create
- {
- __forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc) const
- {
- OBBNodeMB_t* node = (OBBNodeMB_t*) alloc.malloc0(sizeof(OBBNodeMB_t),NodeRef::byteNodeAlignment); node->clear();
- return NodeRef::encodeNode(node);
- }
- };
-
- struct Set
- {
- __forceinline void operator() (NodeRef node, size_t i, NodeRef child, const LinearSpace3fa& space, const LBBox3fa& lbounds, const BBox1f dt) const {
- node.ungetAABBNodeMB()->setRef(i,child);
- node.ungetAABBNodeMB()->setBounds(i,space,lbounds.global(dt));
- }
- };
-
- /*! Clears the node. */
- __forceinline void clear()
- {
- space0 = one;
- //b0.lower = b0.upper = Vec3fa(nan);
- b1.lower = b1.upper = Vec3fa(nan);
- BaseNode_t<NodeRef,N>::clear();
- }
-
- /*! Sets space and bounding boxes. */
- __forceinline void setBounds(size_t i, const AffineSpace3fa& space, const LBBox3fa& lbounds) {
- setBounds(i,space,lbounds.bounds0,lbounds.bounds1);
- }
-
- /*! Sets space and bounding boxes. */
- __forceinline void setBounds(size_t i, const AffineSpace3fa& s0, const BBox3fa& a, const BBox3fa& c)
- {
- assert(i < N);
-
- AffineSpace3fa space = s0;
- space.p -= a.lower;
- Vec3fa scale = 1.0f/max(Vec3fa(1E-19f),a.upper-a.lower);
- space = AffineSpace3fa::scale(scale)*space;
- BBox3fa a1((a.lower-a.lower)*scale,(a.upper-a.lower)*scale);
- BBox3fa c1((c.lower-a.lower)*scale,(c.upper-a.lower)*scale);
-
- space0.l.vx.x[i] = space.l.vx.x; space0.l.vx.y[i] = space.l.vx.y; space0.l.vx.z[i] = space.l.vx.z;
- space0.l.vy.x[i] = space.l.vy.x; space0.l.vy.y[i] = space.l.vy.y; space0.l.vy.z[i] = space.l.vy.z;
- space0.l.vz.x[i] = space.l.vz.x; space0.l.vz.y[i] = space.l.vz.y; space0.l.vz.z[i] = space.l.vz.z;
- space0.p .x[i] = space.p .x; space0.p .y[i] = space.p .y; space0.p .z[i] = space.p .z;
-
- /*b0.lower.x[i] = a1.lower.x; b0.lower.y[i] = a1.lower.y; b0.lower.z[i] = a1.lower.z;
- b0.upper.x[i] = a1.upper.x; b0.upper.y[i] = a1.upper.y; b0.upper.z[i] = a1.upper.z;*/
-
- b1.lower.x[i] = c1.lower.x; b1.lower.y[i] = c1.lower.y; b1.lower.z[i] = c1.lower.z;
- b1.upper.x[i] = c1.upper.x; b1.upper.y[i] = c1.upper.y; b1.upper.z[i] = c1.upper.z;
- }
-
- /*! Sets ID of child. */
- __forceinline void setRef(size_t i, const NodeRef& ref) {
- assert(i < N);
- children[i] = ref;
- }
-
- /*! Returns the extent of the bounds of the ith child */
- __forceinline Vec3fa extent0(size_t i) const {
- assert(i < N);
- const Vec3fa vx(space0.l.vx.x[i],space0.l.vx.y[i],space0.l.vx.z[i]);
- const Vec3fa vy(space0.l.vy.x[i],space0.l.vy.y[i],space0.l.vy.z[i]);
- const Vec3fa vz(space0.l.vz.x[i],space0.l.vz.y[i],space0.l.vz.z[i]);
- return rsqrt(vx*vx + vy*vy + vz*vz);
- }
-
- public:
- AffineSpace3vf<N> space0;
- //BBox3vf<N> b0; // these are the unit bounds
- BBox3vf<N> b1;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_qaabb.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_qaabb.h
deleted file mode 100644
index 5212821f3f..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_qaabb.h
+++ /dev/null
@@ -1,265 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_node_base.h"
-
-namespace embree
-{
- /*! BVHN Quantized Node */
- template<int N>
- struct __aligned(8) QuantizedBaseNode_t
- {
- typedef unsigned char T;
- static const T MIN_QUAN = 0;
- static const T MAX_QUAN = 255;
-
- /*! Clears the node. */
- __forceinline void clear() {
- for (size_t i=0; i<N; i++) lower_x[i] = lower_y[i] = lower_z[i] = MAX_QUAN;
- for (size_t i=0; i<N; i++) upper_x[i] = upper_y[i] = upper_z[i] = MIN_QUAN;
- }
-
- /*! Returns bounds of specified child. */
- __forceinline BBox3fa bounds(size_t i) const
- {
- assert(i < N);
- const Vec3fa lower(madd(scale.x,(float)lower_x[i],start.x),
- madd(scale.y,(float)lower_y[i],start.y),
- madd(scale.z,(float)lower_z[i],start.z));
- const Vec3fa upper(madd(scale.x,(float)upper_x[i],start.x),
- madd(scale.y,(float)upper_y[i],start.y),
- madd(scale.z,(float)upper_z[i],start.z));
- return BBox3fa(lower,upper);
- }
-
- /*! Returns extent of bounds of specified child. */
- __forceinline Vec3fa extent(size_t i) const {
- return bounds(i).size();
- }
-
- static __forceinline void init_dim(const vfloat<N> &lower,
- const vfloat<N> &upper,
- T lower_quant[N],
- T upper_quant[N],
- float &start,
- float &scale)
- {
- /* quantize bounds */
- const vbool<N> m_valid = lower != vfloat<N>(pos_inf);
- const float minF = reduce_min(lower);
- const float maxF = reduce_max(upper);
- float diff = (1.0f+2.0f*float(ulp))*(maxF - minF);
- float decode_scale = diff / float(MAX_QUAN);
- if (decode_scale == 0.0f) decode_scale = 2.0f*FLT_MIN; // result may have been flushed to zero
- assert(madd(decode_scale,float(MAX_QUAN),minF) >= maxF);
- const float encode_scale = diff > 0 ? (float(MAX_QUAN) / diff) : 0.0f;
- vint<N> ilower = max(vint<N>(floor((lower - vfloat<N>(minF))*vfloat<N>(encode_scale))),MIN_QUAN);
- vint<N> iupper = min(vint<N>(ceil ((upper - vfloat<N>(minF))*vfloat<N>(encode_scale))),MAX_QUAN);
-
- /* lower/upper correction */
- vbool<N> m_lower_correction = (madd(vfloat<N>(ilower),decode_scale,minF)) > lower;
- vbool<N> m_upper_correction = (madd(vfloat<N>(iupper),decode_scale,minF)) < upper;
- ilower = max(select(m_lower_correction,ilower-1,ilower),MIN_QUAN);
- iupper = min(select(m_upper_correction,iupper+1,iupper),MAX_QUAN);
-
- /* disable invalid lanes */
- ilower = select(m_valid,ilower,MAX_QUAN);
- iupper = select(m_valid,iupper,MIN_QUAN);
-
- /* store as uchar to memory */
- vint<N>::store(lower_quant,ilower);
- vint<N>::store(upper_quant,iupper);
- start = minF;
- scale = decode_scale;
-
-#if defined(DEBUG)
- vfloat<N> extract_lower( vint<N>::loadu(lower_quant) );
- vfloat<N> extract_upper( vint<N>::loadu(upper_quant) );
- vfloat<N> final_extract_lower = madd(extract_lower,decode_scale,minF);
- vfloat<N> final_extract_upper = madd(extract_upper,decode_scale,minF);
- assert( (movemask(final_extract_lower <= lower ) & movemask(m_valid)) == movemask(m_valid));
- assert( (movemask(final_extract_upper >= upper ) & movemask(m_valid)) == movemask(m_valid));
-#endif
- }
-
- __forceinline void init_dim(AABBNode_t<NodeRefPtr<N>,N>& node)
- {
- init_dim(node.lower_x,node.upper_x,lower_x,upper_x,start.x,scale.x);
- init_dim(node.lower_y,node.upper_y,lower_y,upper_y,start.y,scale.y);
- init_dim(node.lower_z,node.upper_z,lower_z,upper_z,start.z,scale.z);
- }
-
- __forceinline vbool<N> validMask() const { return vint<N>::loadu(lower_x) <= vint<N>::loadu(upper_x); }
-
-#if defined(__AVX512F__) // KNL
- __forceinline vbool16 validMask16() const { return le(0xff,vint<16>::loadu(lower_x),vint<16>::loadu(upper_x)); }
-#endif
- __forceinline vfloat<N> dequantizeLowerX() const { return madd(vfloat<N>(vint<N>::loadu(lower_x)),scale.x,vfloat<N>(start.x)); }
-
- __forceinline vfloat<N> dequantizeUpperX() const { return madd(vfloat<N>(vint<N>::loadu(upper_x)),scale.x,vfloat<N>(start.x)); }
-
- __forceinline vfloat<N> dequantizeLowerY() const { return madd(vfloat<N>(vint<N>::loadu(lower_y)),scale.y,vfloat<N>(start.y)); }
-
- __forceinline vfloat<N> dequantizeUpperY() const { return madd(vfloat<N>(vint<N>::loadu(upper_y)),scale.y,vfloat<N>(start.y)); }
-
- __forceinline vfloat<N> dequantizeLowerZ() const { return madd(vfloat<N>(vint<N>::loadu(lower_z)),scale.z,vfloat<N>(start.z)); }
-
- __forceinline vfloat<N> dequantizeUpperZ() const { return madd(vfloat<N>(vint<N>::loadu(upper_z)),scale.z,vfloat<N>(start.z)); }
-
- template <int M>
- __forceinline vfloat<M> dequantize(const size_t offset) const { return vfloat<M>(vint<M>::loadu(all_planes+offset)); }
-
-#if defined(__AVX512F__)
- __forceinline vfloat16 dequantizeLowerUpperX(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_x),p)),scale.x,vfloat16(start.x)); }
- __forceinline vfloat16 dequantizeLowerUpperY(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_y),p)),scale.y,vfloat16(start.y)); }
- __forceinline vfloat16 dequantizeLowerUpperZ(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_z),p)),scale.z,vfloat16(start.z)); }
-#endif
-
- union {
- struct {
- T lower_x[N]; //!< 8bit discretized X dimension of lower bounds of all N children
- T upper_x[N]; //!< 8bit discretized X dimension of upper bounds of all N children
- T lower_y[N]; //!< 8bit discretized Y dimension of lower bounds of all N children
- T upper_y[N]; //!< 8bit discretized Y dimension of upper bounds of all N children
- T lower_z[N]; //!< 8bit discretized Z dimension of lower bounds of all N children
- T upper_z[N]; //!< 8bit discretized Z dimension of upper bounds of all N children
- };
- T all_planes[6*N];
- };
-
- Vec3f start;
- Vec3f scale;
-
- friend embree_ostream operator<<(embree_ostream o, const QuantizedBaseNode_t& n)
- {
- o << "QuantizedBaseNode { " << embree_endl;
- o << " start " << n.start << embree_endl;
- o << " scale " << n.scale << embree_endl;
- o << " lower_x " << vuint<N>::loadu(n.lower_x) << embree_endl;
- o << " upper_x " << vuint<N>::loadu(n.upper_x) << embree_endl;
- o << " lower_y " << vuint<N>::loadu(n.lower_y) << embree_endl;
- o << " upper_y " << vuint<N>::loadu(n.upper_y) << embree_endl;
- o << " lower_z " << vuint<N>::loadu(n.lower_z) << embree_endl;
- o << " upper_z " << vuint<N>::loadu(n.upper_z) << embree_endl;
- o << "}" << embree_endl;
- return o;
- }
-
- };
-
- template<typename NodeRef, int N>
- struct __aligned(8) QuantizedNode_t : public BaseNode_t<NodeRef, N>, QuantizedBaseNode_t<N>
- {
- using BaseNode_t<NodeRef,N>::children;
- using QuantizedBaseNode_t<N>::lower_x;
- using QuantizedBaseNode_t<N>::upper_x;
- using QuantizedBaseNode_t<N>::lower_y;
- using QuantizedBaseNode_t<N>::upper_y;
- using QuantizedBaseNode_t<N>::lower_z;
- using QuantizedBaseNode_t<N>::upper_z;
- using QuantizedBaseNode_t<N>::start;
- using QuantizedBaseNode_t<N>::scale;
- using QuantizedBaseNode_t<N>::init_dim;
-
- __forceinline void setRef(size_t i, const NodeRef& ref) {
- assert(i < N);
- children[i] = ref;
- }
-
- struct Create2
- {
- template<typename BuildRecord>
- __forceinline NodeRef operator() (BuildRecord* children, const size_t n, const FastAllocator::CachedAllocator& alloc) const
- {
- __aligned(64) AABBNode_t<NodeRef,N> node;
- node.clear();
- for (size_t i=0; i<n; i++) {
- node.setBounds(i,children[i].bounds());
- }
- QuantizedNode_t *qnode = (QuantizedNode_t*) alloc.malloc0(sizeof(QuantizedNode_t), NodeRef::byteAlignment);
- qnode->init(node);
-
- return (size_t)qnode | NodeRef::tyQuantizedNode;
- }
- };
-
- struct Set2
- {
- template<typename BuildRecord>
- __forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
- {
- QuantizedNode_t* node = ref.quantizedNode();
- for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
- return ref;
- }
- };
-
- __forceinline void init(AABBNode_t<NodeRef,N>& node)
- {
- for (size_t i=0;i<N;i++) children[i] = NodeRef::emptyNode;
- init_dim(node);
- }
-
- };
-
- /*! BVHN Quantized Node */
- template<int N>
- struct __aligned(8) QuantizedBaseNodeMB_t
- {
- QuantizedBaseNode_t<N> node0;
- QuantizedBaseNode_t<N> node1;
-
- /*! Clears the node. */
- __forceinline void clear() {
- node0.clear();
- node1.clear();
- }
-
- /*! Returns bounds of specified child. */
- __forceinline BBox3fa bounds(size_t i) const
- {
- assert(i < N);
- BBox3fa bounds0 = node0.bounds(i);
- BBox3fa bounds1 = node1.bounds(i);
- bounds0.extend(bounds1);
- return bounds0;
- }
-
- /*! Returns extent of bounds of specified child. */
- __forceinline Vec3fa extent(size_t i) const {
- return bounds(i).size();
- }
-
- __forceinline vbool<N> validMask() const { return node0.validMask(); }
-
- template<typename T>
- __forceinline vfloat<N> dequantizeLowerX(const T t) const { return lerp(node0.dequantizeLowerX(),node1.dequantizeLowerX(),t); }
- template<typename T>
- __forceinline vfloat<N> dequantizeUpperX(const T t) const { return lerp(node0.dequantizeUpperX(),node1.dequantizeUpperX(),t); }
- template<typename T>
- __forceinline vfloat<N> dequantizeLowerY(const T t) const { return lerp(node0.dequantizeLowerY(),node1.dequantizeLowerY(),t); }
- template<typename T>
- __forceinline vfloat<N> dequantizeUpperY(const T t) const { return lerp(node0.dequantizeUpperY(),node1.dequantizeUpperY(),t); }
- template<typename T>
- __forceinline vfloat<N> dequantizeLowerZ(const T t) const { return lerp(node0.dequantizeLowerZ(),node1.dequantizeLowerZ(),t); }
- template<typename T>
- __forceinline vfloat<N> dequantizeUpperZ(const T t) const { return lerp(node0.dequantizeUpperZ(),node1.dequantizeUpperZ(),t); }
-
-
- template<int M>
- __forceinline vfloat<M> dequantizeLowerX(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerX()[i]),vfloat<M>(node1.dequantizeLowerX()[i]),t); }
- template<int M>
- __forceinline vfloat<M> dequantizeUpperX(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperX()[i]),vfloat<M>(node1.dequantizeUpperX()[i]),t); }
- template<int M>
- __forceinline vfloat<M> dequantizeLowerY(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerY()[i]),vfloat<M>(node1.dequantizeLowerY()[i]),t); }
- template<int M>
- __forceinline vfloat<M> dequantizeUpperY(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperY()[i]),vfloat<M>(node1.dequantizeUpperY()[i]),t); }
- template<int M>
- __forceinline vfloat<M> dequantizeLowerZ(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerZ()[i]),vfloat<M>(node1.dequantizeLowerZ()[i]),t); }
- template<int M>
- __forceinline vfloat<M> dequantizeUpperZ(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperZ()[i]),vfloat<M>(node1.dequantizeUpperZ()[i]),t); }
-
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_ref.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_ref.h
deleted file mode 100644
index 0f6d4dac7e..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_ref.h
+++ /dev/null
@@ -1,242 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/default.h"
-#include "../common/alloc.h"
-#include "../common/accel.h"
-#include "../common/device.h"
-#include "../common/scene.h"
-#include "../geometry/primitive.h"
-#include "../common/ray.h"
-
-namespace embree
-{
- /* BVH node reference with bounds */
- template<typename NodeRef>
- struct BVHNodeRecord
- {
- __forceinline BVHNodeRecord() {}
- __forceinline BVHNodeRecord(NodeRef ref, const BBox3fa& bounds) : ref(ref), bounds((BBox3fx)bounds) {}
- __forceinline BVHNodeRecord(NodeRef ref, const BBox3fx& bounds) : ref(ref), bounds(bounds) {}
-
- NodeRef ref;
- BBox3fx bounds;
- };
-
- template<typename NodeRef>
- struct BVHNodeRecordMB
- {
- __forceinline BVHNodeRecordMB() {}
- __forceinline BVHNodeRecordMB(NodeRef ref, const LBBox3fa& lbounds) : ref(ref), lbounds(lbounds) {}
-
- NodeRef ref;
- LBBox3fa lbounds;
- };
-
- template<typename NodeRef>
- struct BVHNodeRecordMB4D
- {
- __forceinline BVHNodeRecordMB4D() {}
- __forceinline BVHNodeRecordMB4D(NodeRef ref, const LBBox3fa& lbounds, const BBox1f& dt) : ref(ref), lbounds(lbounds), dt(dt) {}
-
- NodeRef ref;
- LBBox3fa lbounds;
- BBox1f dt;
- };
-
- template<typename NodeRef, int N> struct BaseNode_t;
- template<typename NodeRef, int N> struct AABBNode_t;
- template<typename NodeRef, int N> struct AABBNodeMB_t;
- template<typename NodeRef, int N> struct AABBNodeMB4D_t;
- template<typename NodeRef, int N> struct OBBNode_t;
- template<typename NodeRef, int N> struct OBBNodeMB_t;
- template<typename NodeRef, int N> struct QuantizedNode_t;
- template<typename NodeRef, int N> struct QuantizedNodeMB_t;
-
- /*! Pointer that points to a node or a list of primitives */
- template<int N>
- struct NodeRefPtr
- {
- //template<int NN> friend class BVHN;
-
- /*! Number of bytes the nodes and primitives are minimally aligned to.*/
- static const size_t byteAlignment = 16;
- static const size_t byteNodeAlignment = 4*N;
-
- /*! highest address bit is used as barrier for some algorithms */
- static const size_t barrier_mask = (1LL << (8*sizeof(size_t)-1));
-
- /*! Masks the bits that store the number of items per leaf. */
- static const size_t align_mask = byteAlignment-1;
- static const size_t items_mask = byteAlignment-1;
-
- /*! different supported node types */
- static const size_t tyAABBNode = 0;
- static const size_t tyAABBNodeMB = 1;
- static const size_t tyAABBNodeMB4D = 6;
- static const size_t tyOBBNode = 2;
- static const size_t tyOBBNodeMB = 3;
- static const size_t tyQuantizedNode = 5;
- static const size_t tyLeaf = 8;
-
- /*! Empty node */
- static const size_t emptyNode = tyLeaf;
-
- /*! Invalid node, used as marker in traversal */
- static const size_t invalidNode = (((size_t)-1) & (~items_mask)) | (tyLeaf+0);
- static const size_t popRay = (((size_t)-1) & (~items_mask)) | (tyLeaf+1);
-
- /*! Maximum number of primitive blocks in a leaf. */
- static const size_t maxLeafBlocks = items_mask-tyLeaf;
-
- /*! Default constructor */
- __forceinline NodeRefPtr () {}
-
- /*! Construction from integer */
- __forceinline NodeRefPtr (size_t ptr) : ptr(ptr) {}
-
- /*! Cast to size_t */
- __forceinline operator size_t() const { return ptr; }
-
- /*! Sets the barrier bit. */
- __forceinline void setBarrier() {
-#if defined(__X86_64__) || defined(__aarch64__)
- assert(!isBarrier());
- ptr |= barrier_mask;
-#else
- assert(false);
-#endif
- }
-
- /*! Clears the barrier bit. */
- __forceinline void clearBarrier() {
-#if defined(__X86_64__) || defined(__aarch64__)
- ptr &= ~barrier_mask;
-#else
- assert(false);
-#endif
- }
-
- /*! Checks if this is an barrier. A barrier tells the top level tree rotations how deep to enter the tree. */
- __forceinline bool isBarrier() const { return (ptr & barrier_mask) != 0; }
-
- /*! checks if this is a leaf */
- __forceinline size_t isLeaf() const { return ptr & tyLeaf; }
-
- /*! returns node type */
- __forceinline int type() const { return ptr & (size_t)align_mask; }
-
- /*! checks if this is a node */
- __forceinline int isAABBNode() const { return (ptr & (size_t)align_mask) == tyAABBNode; }
-
- /*! checks if this is a motion blur node */
- __forceinline int isAABBNodeMB() const { return (ptr & (size_t)align_mask) == tyAABBNodeMB; }
-
- /*! checks if this is a 4D motion blur node */
- __forceinline int isAABBNodeMB4D() const { return (ptr & (size_t)align_mask) == tyAABBNodeMB4D; }
-
- /*! checks if this is a node with unaligned bounding boxes */
- __forceinline int isOBBNode() const { return (ptr & (size_t)align_mask) == tyOBBNode; }
-
- /*! checks if this is a motion blur node with unaligned bounding boxes */
- __forceinline int isOBBNodeMB() const { return (ptr & (size_t)align_mask) == tyOBBNodeMB; }
-
- /*! checks if this is a quantized node */
- __forceinline int isQuantizedNode() const { return (ptr & (size_t)align_mask) == tyQuantizedNode; }
-
- /*! Encodes a node */
- static __forceinline NodeRefPtr encodeNode(AABBNode_t<NodeRefPtr,N>* node) {
- assert(!((size_t)node & align_mask));
- return NodeRefPtr((size_t) node);
- }
-
- static __forceinline NodeRefPtr encodeNode(AABBNodeMB_t<NodeRefPtr,N>* node) {
- assert(!((size_t)node & align_mask));
- return NodeRefPtr((size_t) node | tyAABBNodeMB);
- }
-
- static __forceinline NodeRefPtr encodeNode(AABBNodeMB4D_t<NodeRefPtr,N>* node) {
- assert(!((size_t)node & align_mask));
- return NodeRefPtr((size_t) node | tyAABBNodeMB4D);
- }
-
- /*! Encodes an unaligned node */
- static __forceinline NodeRefPtr encodeNode(OBBNode_t<NodeRefPtr,N>* node) {
- return NodeRefPtr((size_t) node | tyOBBNode);
- }
-
- /*! Encodes an unaligned motion blur node */
- static __forceinline NodeRefPtr encodeNode(OBBNodeMB_t<NodeRefPtr,N>* node) {
- return NodeRefPtr((size_t) node | tyOBBNodeMB);
- }
-
- /*! Encodes a leaf */
- static __forceinline NodeRefPtr encodeLeaf(void* tri, size_t num) {
- assert(!((size_t)tri & align_mask));
- assert(num <= maxLeafBlocks);
- return NodeRefPtr((size_t)tri | (tyLeaf+min(num,(size_t)maxLeafBlocks)));
- }
-
- /*! Encodes a leaf */
- static __forceinline NodeRefPtr encodeTypedLeaf(void* ptr, size_t ty) {
- assert(!((size_t)ptr & align_mask));
- return NodeRefPtr((size_t)ptr | (tyLeaf+ty));
- }
-
- /*! returns base node pointer */
- __forceinline BaseNode_t<NodeRefPtr,N>* baseNode()
- {
- assert(!isLeaf());
- return (BaseNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask);
- }
- __forceinline const BaseNode_t<NodeRefPtr,N>* baseNode() const
- {
- assert(!isLeaf());
- return (const BaseNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask);
- }
-
- /*! returns node pointer */
- __forceinline AABBNode_t<NodeRefPtr,N>* getAABBNode() { assert(isAABBNode()); return ( AABBNode_t<NodeRefPtr,N>*)ptr; }
- __forceinline const AABBNode_t<NodeRefPtr,N>* getAABBNode() const { assert(isAABBNode()); return (const AABBNode_t<NodeRefPtr,N>*)ptr; }
-
- /*! returns motion blur node pointer */
- __forceinline AABBNodeMB_t<NodeRefPtr,N>* getAABBNodeMB() { assert(isAABBNodeMB() || isAABBNodeMB4D()); return ( AABBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
- __forceinline const AABBNodeMB_t<NodeRefPtr,N>* getAABBNodeMB() const { assert(isAABBNodeMB() || isAABBNodeMB4D()); return (const AABBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
-
- /*! returns 4D motion blur node pointer */
- __forceinline AABBNodeMB4D_t<NodeRefPtr,N>* getAABBNodeMB4D() { assert(isAABBNodeMB4D()); return ( AABBNodeMB4D_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
- __forceinline const AABBNodeMB4D_t<NodeRefPtr,N>* getAABBNodeMB4D() const { assert(isAABBNodeMB4D()); return (const AABBNodeMB4D_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
-
- /*! returns unaligned node pointer */
- __forceinline OBBNode_t<NodeRefPtr,N>* ungetAABBNode() { assert(isOBBNode()); return ( OBBNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
- __forceinline const OBBNode_t<NodeRefPtr,N>* ungetAABBNode() const { assert(isOBBNode()); return (const OBBNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
-
- /*! returns unaligned motion blur node pointer */
- __forceinline OBBNodeMB_t<NodeRefPtr,N>* ungetAABBNodeMB() { assert(isOBBNodeMB()); return ( OBBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
- __forceinline const OBBNodeMB_t<NodeRefPtr,N>* ungetAABBNodeMB() const { assert(isOBBNodeMB()); return (const OBBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
-
- /*! returns quantized node pointer */
- __forceinline QuantizedNode_t<NodeRefPtr,N>* quantizedNode() { assert(isQuantizedNode()); return ( QuantizedNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask ); }
- __forceinline const QuantizedNode_t<NodeRefPtr,N>* quantizedNode() const { assert(isQuantizedNode()); return (const QuantizedNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask ); }
-
- /*! returns leaf pointer */
- __forceinline char* leaf(size_t& num) const {
- assert(isLeaf());
- num = (ptr & (size_t)items_mask)-tyLeaf;
- return (char*)(ptr & ~(size_t)align_mask);
- }
-
- /*! clear all bit flags */
- __forceinline void clearFlags() {
- ptr &= ~(size_t)align_mask;
- }
-
- /*! returns the wideness */
- __forceinline size_t getN() const { return N; }
-
- public:
- size_t ptr;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.cpp
deleted file mode 100644
index a273c21e8b..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.cpp
+++ /dev/null
@@ -1,247 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh_refit.h"
-#include "bvh_statistics.h"
-
-#include "../geometry/linei.h"
-#include "../geometry/triangle.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/trianglei.h"
-#include "../geometry/quadv.h"
-#include "../geometry/object.h"
-#include "../geometry/instance.h"
-
-namespace embree
-{
- namespace isa
- {
- static const size_t SINGLE_THREAD_THRESHOLD = 4*1024;
-
- template<int N>
- __forceinline bool compare(const typename BVHN<N>::NodeRef* a, const typename BVHN<N>::NodeRef* b)
- {
- size_t sa = *(size_t*)&a->node()->lower_x;
- size_t sb = *(size_t*)&b->node()->lower_x;
- return sa < sb;
- }
-
- template<int N>
- BVHNRefitter<N>::BVHNRefitter (BVH* bvh, const LeafBoundsInterface& leafBounds)
- : bvh(bvh), leafBounds(leafBounds), numSubTrees(0)
- {
- }
-
- template<int N>
- void BVHNRefitter<N>::refit()
- {
- if (bvh->numPrimitives <= SINGLE_THREAD_THRESHOLD) {
- bvh->bounds = LBBox3fa(recurse_bottom(bvh->root));
- }
- else
- {
- BBox3fa subTreeBounds[MAX_NUM_SUB_TREES];
- numSubTrees = 0;
- gather_subtree_refs(bvh->root,numSubTrees,0);
- if (numSubTrees)
- parallel_for(size_t(0), numSubTrees, size_t(1), [&](const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- NodeRef& ref = subTrees[i];
- subTreeBounds[i] = recurse_bottom(ref);
- }
- });
-
- numSubTrees = 0;
- bvh->bounds = LBBox3fa(refit_toplevel(bvh->root,numSubTrees,subTreeBounds,0));
- }
- }
-
- template<int N>
- void BVHNRefitter<N>::gather_subtree_refs(NodeRef& ref,
- size_t &subtrees,
- const size_t depth)
- {
- if (depth >= MAX_SUB_TREE_EXTRACTION_DEPTH)
- {
- assert(subtrees < MAX_NUM_SUB_TREES);
- subTrees[subtrees++] = ref;
- return;
- }
-
- if (ref.isAABBNode())
- {
- AABBNode* node = ref.getAABBNode();
- for (size_t i=0; i<N; i++) {
- NodeRef& child = node->child(i);
- if (unlikely(child == BVH::emptyNode)) continue;
- gather_subtree_refs(child,subtrees,depth+1);
- }
- }
- }
-
- template<int N>
- BBox3fa BVHNRefitter<N>::refit_toplevel(NodeRef& ref,
- size_t &subtrees,
- const BBox3fa *const subTreeBounds,
- const size_t depth)
- {
- if (depth >= MAX_SUB_TREE_EXTRACTION_DEPTH)
- {
- assert(subtrees < MAX_NUM_SUB_TREES);
- assert(subTrees[subtrees] == ref);
- return subTreeBounds[subtrees++];
- }
-
- if (ref.isAABBNode())
- {
- AABBNode* node = ref.getAABBNode();
- BBox3fa bounds[N];
-
- for (size_t i=0; i<N; i++)
- {
- NodeRef& child = node->child(i);
-
- if (unlikely(child == BVH::emptyNode))
- bounds[i] = BBox3fa(empty);
- else
- bounds[i] = refit_toplevel(child,subtrees,subTreeBounds,depth+1);
- }
-
- BBox3vf<N> boundsT = transpose<N>(bounds);
-
- /* set new bounds */
- node->lower_x = boundsT.lower.x;
- node->lower_y = boundsT.lower.y;
- node->lower_z = boundsT.lower.z;
- node->upper_x = boundsT.upper.x;
- node->upper_y = boundsT.upper.y;
- node->upper_z = boundsT.upper.z;
-
- return merge<N>(bounds);
- }
- else
- return leafBounds.leafBounds(ref);
- }
-
- // =========================================================
- // =========================================================
- // =========================================================
-
-
- template<int N>
- BBox3fa BVHNRefitter<N>::recurse_bottom(NodeRef& ref)
- {
- /* this is a leaf node */
- if (unlikely(ref.isLeaf()))
- return leafBounds.leafBounds(ref);
-
- /* recurse if this is an internal node */
- AABBNode* node = ref.getAABBNode();
-
- /* enable exclusive prefetch for >= AVX platforms */
-#if defined(__AVX__)
- BVH::prefetchW(ref);
-#endif
- BBox3fa bounds[N];
-
- for (size_t i=0; i<N; i++)
- if (unlikely(node->child(i) == BVH::emptyNode))
- {
- bounds[i] = BBox3fa(empty);
- }
- else
- bounds[i] = recurse_bottom(node->child(i));
-
- /* AOS to SOA transform */
- BBox3vf<N> boundsT = transpose<N>(bounds);
-
- /* set new bounds */
- node->lower_x = boundsT.lower.x;
- node->lower_y = boundsT.lower.y;
- node->lower_z = boundsT.lower.z;
- node->upper_x = boundsT.upper.x;
- node->upper_y = boundsT.upper.y;
- node->upper_z = boundsT.upper.z;
-
- return merge<N>(bounds);
- }
-
- template<int N, typename Mesh, typename Primitive>
- BVHNRefitT<N,Mesh,Primitive>::BVHNRefitT (BVH* bvh, Builder* builder, Mesh* mesh, size_t mode)
- : bvh(bvh), builder(builder), refitter(new BVHNRefitter<N>(bvh,*(typename BVHNRefitter<N>::LeafBoundsInterface*)this)), mesh(mesh), topologyVersion(0) {}
-
- template<int N, typename Mesh, typename Primitive>
- void BVHNRefitT<N,Mesh,Primitive>::clear()
- {
- if (builder)
- builder->clear();
- }
-
- template<int N, typename Mesh, typename Primitive>
- void BVHNRefitT<N,Mesh,Primitive>::build()
- {
- if (mesh->topologyChanged(topologyVersion)) {
- topologyVersion = mesh->getTopologyVersion();
- builder->build();
- }
- else
- refitter->refit();
- }
-
- template class BVHNRefitter<4>;
-#if defined(__AVX__)
- template class BVHNRefitter<8>;
-#endif
-
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- Builder* BVH4Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
- Builder* BVH4Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
- Builder* BVH4Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
-
- Builder* BVH4Triangle4MeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4> ((BVH4*)accel,BVH4Triangle4MeshBuilderSAH (accel,mesh,geomID,mode),mesh,mode); }
- Builder* BVH4Triangle4vMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4v>((BVH4*)accel,BVH4Triangle4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
- Builder* BVH4Triangle4iMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4i>((BVH4*)accel,BVH4Triangle4iMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
-#if defined(__AVX__)
- Builder* BVH8Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
- Builder* BVH8Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
- Builder* BVH8Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
-
- Builder* BVH8Triangle4MeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4> ((BVH8*)accel,BVH8Triangle4MeshBuilderSAH (accel,mesh,geomID,mode),mesh,mode); }
- Builder* BVH8Triangle4vMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4v>((BVH8*)accel,BVH8Triangle4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
- Builder* BVH8Triangle4iMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4i>((BVH8*)accel,BVH8Triangle4iMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_QUAD)
- Builder* BVH4Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode);
- Builder* BVH4Quad4vMeshRefitSAH (void* accel, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,QuadMesh,Quad4v>((BVH4*)accel,BVH4Quad4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
-
-#if defined(__AVX__)
- Builder* BVH8Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode);
- Builder* BVH8Quad4vMeshRefitSAH (void* accel, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,QuadMesh,Quad4v>((BVH8*)accel,BVH8Quad4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
-#endif
-
-#endif
-
-#if defined(EMBREE_GEOMETRY_USER)
- Builder* BVH4VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode);
- Builder* BVH4VirtualMeshRefitSAH (void* accel, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,UserGeometry,Object>((BVH4*)accel,BVH4VirtualMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
-
-#if defined(__AVX__)
- Builder* BVH8VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode);
- Builder* BVH8VirtualMeshRefitSAH (void* accel, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,UserGeometry,Object>((BVH8*)accel,BVH8VirtualMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- Builder* BVH4InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
- Builder* BVH4InstanceMeshRefitSAH (void* accel, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,Instance,InstancePrimitive>((BVH4*)accel,BVH4InstanceMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
-
-#if defined(__AVX__)
- Builder* BVH8InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
- Builder* BVH8InstanceMeshRefitSAH (void* accel, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,Instance,InstancePrimitive>((BVH8*)accel,BVH8InstanceMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
-#endif
-#endif
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.h
deleted file mode 100644
index 4aa9bdd7cc..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.h
+++ /dev/null
@@ -1,95 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../bvh/bvh.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int N>
- class BVHNRefitter
- {
- public:
-
- /*! Type shortcuts */
- typedef BVHN<N> BVH;
- typedef typename BVH::AABBNode AABBNode;
- typedef typename BVH::NodeRef NodeRef;
-
- struct LeafBoundsInterface {
- virtual const BBox3fa leafBounds(NodeRef& ref) const = 0;
- };
-
- public:
-
- /*! Constructor. */
- BVHNRefitter (BVH* bvh, const LeafBoundsInterface& leafBounds);
-
- /*! refits the BVH */
- void refit();
-
- private:
- /* single-threaded subtree extraction based on BVH depth */
- void gather_subtree_refs(NodeRef& ref,
- size_t &subtrees,
- const size_t depth = 0);
-
- /* single-threaded top-level refit */
- BBox3fa refit_toplevel(NodeRef& ref,
- size_t &subtrees,
- const BBox3fa *const subTreeBounds,
- const size_t depth = 0);
-
- /* single-threaded subtree refit */
- BBox3fa recurse_bottom(NodeRef& ref);
-
- public:
- BVH* bvh; //!< BVH to refit
- const LeafBoundsInterface& leafBounds; //!< calculates bounds of leaves
-
- static const size_t MAX_SUB_TREE_EXTRACTION_DEPTH = (N==4) ? 4 : (N==8) ? 3 : 3;
- static const size_t MAX_NUM_SUB_TREES = (N==4) ? 256 : (N==8) ? 512 : N*N*N; // N ^ MAX_SUB_TREE_EXTRACTION_DEPTH
- size_t numSubTrees;
- NodeRef subTrees[MAX_NUM_SUB_TREES];
- };
-
- template<int N, typename Mesh, typename Primitive>
- class BVHNRefitT : public Builder, public BVHNRefitter<N>::LeafBoundsInterface
- {
- public:
-
- /*! Type shortcuts */
- typedef BVHN<N> BVH;
- typedef typename BVH::AABBNode AABBNode;
- typedef typename BVH::NodeRef NodeRef;
-
- public:
- BVHNRefitT (BVH* bvh, Builder* builder, Mesh* mesh, size_t mode);
-
- virtual void build();
-
- virtual void clear();
-
- virtual const BBox3fa leafBounds (NodeRef& ref) const
- {
- size_t num; char* prim = ref.leaf(num);
- if (unlikely(ref == BVH::emptyNode)) return empty;
-
- BBox3fa bounds = empty;
- for (size_t i=0; i<num; i++)
- bounds.extend(((Primitive*)prim)[i].update(mesh));
- return bounds;
- }
-
- private:
- BVH* bvh;
- std::unique_ptr<Builder> builder;
- std::unique_ptr<BVHNRefitter<N>> refitter;
- Mesh* mesh;
- unsigned int topologyVersion;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.cpp
deleted file mode 100644
index 2bb431bf0e..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh_rotate.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Computes half surface area of box. */
- __forceinline float halfArea3f(const BBox<vfloat4>& box) {
- const vfloat4 d = box.size();
- const vfloat4 a = d*shuffle<1,2,0,3>(d);
- return a[0]+a[1]+a[2];
- }
-
- size_t BVHNRotate<4>::rotate(NodeRef parentRef, size_t depth)
- {
- /*! nothing to rotate if we reached a leaf node. */
- if (parentRef.isBarrier()) return 0;
- if (parentRef.isLeaf()) return 0;
- AABBNode* parent = parentRef.getAABBNode();
-
- /*! rotate all children first */
- vint4 cdepth;
- for (size_t c=0; c<4; c++)
- cdepth[c] = (int)rotate(parent->child(c),depth+1);
-
- /* compute current areas of all children */
- vfloat4 sizeX = parent->upper_x-parent->lower_x;
- vfloat4 sizeY = parent->upper_y-parent->lower_y;
- vfloat4 sizeZ = parent->upper_z-parent->lower_z;
- vfloat4 childArea = madd(sizeX,(sizeY + sizeZ),sizeY*sizeZ);
-
- /*! get node bounds */
- BBox<vfloat4> child1_0,child1_1,child1_2,child1_3;
- parent->bounds(child1_0,child1_1,child1_2,child1_3);
-
- /*! Find best rotation. We pick a first child (child1) and a sub-child
- (child2child) of a different second child (child2), and swap child1
- and child2child. We perform the best such swap. */
- float bestArea = 0;
- size_t bestChild1 = -1, bestChild2 = -1, bestChild2Child = -1;
- for (size_t c2=0; c2<4; c2++)
- {
- /*! ignore leaf nodes as we cannot descent into them */
- if (parent->child(c2).isBarrier()) continue;
- if (parent->child(c2).isLeaf()) continue;
- AABBNode* child2 = parent->child(c2).getAABBNode();
-
- /*! transpose child bounds */
- BBox<vfloat4> child2c0,child2c1,child2c2,child2c3;
- child2->bounds(child2c0,child2c1,child2c2,child2c3);
-
- /*! put child1_0 at each child2 position */
- float cost00 = halfArea3f(merge(child1_0,child2c1,child2c2,child2c3));
- float cost01 = halfArea3f(merge(child2c0,child1_0,child2c2,child2c3));
- float cost02 = halfArea3f(merge(child2c0,child2c1,child1_0,child2c3));
- float cost03 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_0));
- vfloat4 cost0 = vfloat4(cost00,cost01,cost02,cost03);
- vfloat4 min0 = vreduce_min(cost0);
- int pos0 = (int)bsf(movemask(min0 == cost0));
-
- /*! put child1_1 at each child2 position */
- float cost10 = halfArea3f(merge(child1_1,child2c1,child2c2,child2c3));
- float cost11 = halfArea3f(merge(child2c0,child1_1,child2c2,child2c3));
- float cost12 = halfArea3f(merge(child2c0,child2c1,child1_1,child2c3));
- float cost13 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_1));
- vfloat4 cost1 = vfloat4(cost10,cost11,cost12,cost13);
- vfloat4 min1 = vreduce_min(cost1);
- int pos1 = (int)bsf(movemask(min1 == cost1));
-
- /*! put child1_2 at each child2 position */
- float cost20 = halfArea3f(merge(child1_2,child2c1,child2c2,child2c3));
- float cost21 = halfArea3f(merge(child2c0,child1_2,child2c2,child2c3));
- float cost22 = halfArea3f(merge(child2c0,child2c1,child1_2,child2c3));
- float cost23 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_2));
- vfloat4 cost2 = vfloat4(cost20,cost21,cost22,cost23);
- vfloat4 min2 = vreduce_min(cost2);
- int pos2 = (int)bsf(movemask(min2 == cost2));
-
- /*! put child1_3 at each child2 position */
- float cost30 = halfArea3f(merge(child1_3,child2c1,child2c2,child2c3));
- float cost31 = halfArea3f(merge(child2c0,child1_3,child2c2,child2c3));
- float cost32 = halfArea3f(merge(child2c0,child2c1,child1_3,child2c3));
- float cost33 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_3));
- vfloat4 cost3 = vfloat4(cost30,cost31,cost32,cost33);
- vfloat4 min3 = vreduce_min(cost3);
- int pos3 = (int)bsf(movemask(min3 == cost3));
-
- /*! find best other child */
- vfloat4 area0123 = vfloat4(extract<0>(min0),extract<0>(min1),extract<0>(min2),extract<0>(min3)) - vfloat4(childArea[c2]);
- int pos[4] = { pos0,pos1,pos2,pos3 };
- const size_t mbd = BVH4::maxBuildDepth;
- vbool4 valid = vint4(int(depth+1))+cdepth <= vint4(mbd); // only select swaps that fulfill depth constraints
- valid &= vint4(int(c2)) != vint4(step);
- if (none(valid)) continue;
- size_t c1 = select_min(valid,area0123);
- float area = area0123[c1];
- if (c1 == c2) continue; // can happen if bounds are NANs
-
- /*! accept a swap when it reduces cost and is not swapping a node with itself */
- if (area < bestArea) {
- bestArea = area;
- bestChild1 = c1;
- bestChild2 = c2;
- bestChild2Child = pos[c1];
- }
- }
-
- /*! if we did not find a swap that improves the SAH then do nothing */
- if (bestChild1 == size_t(-1)) return 1+reduce_max(cdepth);
-
- /*! perform the best found tree rotation */
- AABBNode* child2 = parent->child(bestChild2).getAABBNode();
- AABBNode::swap(parent,bestChild1,child2,bestChild2Child);
- parent->setBounds(bestChild2,child2->bounds());
- AABBNode::compact(parent);
- AABBNode::compact(child2);
-
- /*! This returned depth is conservative as the child that was
- * pulled up in the tree could have been on the critical path. */
- cdepth[bestChild1]++; // bestChild1 was pushed down one level
- return 1+reduce_max(cdepth);
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.h
deleted file mode 100644
index 009bef339e..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.h
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int N>
- class BVHNRotate
- {
- typedef typename BVHN<N>::NodeRef NodeRef;
-
- public:
- static const bool enabled = false;
-
- static __forceinline size_t rotate(NodeRef parentRef, size_t depth = 1) { return 0; }
- static __forceinline void restructure(NodeRef ref, size_t depth = 1) {}
- };
-
- /* BVH4 tree rotations */
- template<>
- class BVHNRotate<4>
- {
- typedef BVH4::AABBNode AABBNode;
- typedef BVH4::NodeRef NodeRef;
-
- public:
- static const bool enabled = true;
-
- static size_t rotate(NodeRef parentRef, size_t depth = 1);
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp
deleted file mode 100644
index aa56035026..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp
+++ /dev/null
@@ -1,168 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh_statistics.h"
-#include "../../common/algorithms/parallel_reduce.h"
-
-namespace embree
-{
- template<int N>
- BVHNStatistics<N>::BVHNStatistics (BVH* bvh) : bvh(bvh)
- {
- double A = max(0.0f,bvh->getLinearBounds().expectedHalfArea());
- stat = statistics(bvh->root,A,BBox1f(0.0f,1.0f));
- }
-
- template<int N>
- std::string BVHNStatistics<N>::str()
- {
- std::ostringstream stream;
- stream.setf(std::ios::fixed, std::ios::floatfield);
- stream << " primitives = " << bvh->numPrimitives << ", vertices = " << bvh->numVertices << ", depth = " << stat.depth << std::endl;
- size_t totalBytes = stat.bytes(bvh);
- double totalSAH = stat.sah(bvh);
- stream << " total : sah = " << std::setw(7) << std::setprecision(3) << totalSAH << " (100.00%), ";
- stream << "#bytes = " << std::setw(7) << std::setprecision(2) << totalBytes/1E6 << " MB (100.00%), ";
- stream << "#nodes = " << std::setw(7) << stat.size() << " (" << std::setw(6) << std::setprecision(2) << 100.0*stat.fillRate(bvh) << "% filled), ";
- stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(totalBytes)/double(bvh->numPrimitives) << std::endl;
- if (stat.statAABBNodes.numNodes ) stream << " getAABBNodes : " << stat.statAABBNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
- if (stat.statOBBNodes.numNodes ) stream << " ungetAABBNodes : " << stat.statOBBNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
- if (stat.statAABBNodesMB.numNodes ) stream << " getAABBNodesMB : " << stat.statAABBNodesMB.toString(bvh,totalSAH,totalBytes) << std::endl;
- if (stat.statAABBNodesMB4D.numNodes) stream << " getAABBNodesMB4D : " << stat.statAABBNodesMB4D.toString(bvh,totalSAH,totalBytes) << std::endl;
- if (stat.statOBBNodesMB.numNodes) stream << " ungetAABBNodesMB : " << stat.statOBBNodesMB.toString(bvh,totalSAH,totalBytes) << std::endl;
- if (stat.statQuantizedNodes.numNodes ) stream << " quantizedNodes : " << stat.statQuantizedNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
- if (true) stream << " leaves : " << stat.statLeaf.toString(bvh,totalSAH,totalBytes) << std::endl;
- if (true) stream << " histogram : " << stat.statLeaf.histToString() << std::endl;
- return stream.str();
- }
-
- template<int N>
- typename BVHNStatistics<N>::Statistics BVHNStatistics<N>::statistics(NodeRef node, const double A, const BBox1f t0t1)
- {
- Statistics s;
- assert(t0t1.size() > 0.0f);
- double dt = max(0.0f,t0t1.size());
- if (node.isAABBNode())
- {
- AABBNode* n = node.getAABBNode();
- s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
- if (n->child(i) == BVH::emptyNode) return Statistics();
- const double Ai = max(0.0f,halfArea(n->extend(i)));
- Statistics s = statistics(n->child(i),Ai,t0t1);
- s.statAABBNodes.numChildren++;
- return s;
- }, Statistics::add);
- s.statAABBNodes.numNodes++;
- s.statAABBNodes.nodeSAH += dt*A;
- s.depth++;
- }
- else if (node.isOBBNode())
- {
- OBBNode* n = node.ungetAABBNode();
- s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
- if (n->child(i) == BVH::emptyNode) return Statistics();
- const double Ai = max(0.0f,halfArea(n->extent(i)));
- Statistics s = statistics(n->child(i),Ai,t0t1);
- s.statOBBNodes.numChildren++;
- return s;
- }, Statistics::add);
- s.statOBBNodes.numNodes++;
- s.statOBBNodes.nodeSAH += dt*A;
- s.depth++;
- }
- else if (node.isAABBNodeMB())
- {
- AABBNodeMB* n = node.getAABBNodeMB();
- s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
- if (n->child(i) == BVH::emptyNode) return Statistics();
- const double Ai = max(0.0f,n->expectedHalfArea(i,t0t1));
- Statistics s = statistics(n->child(i),Ai,t0t1);
- s.statAABBNodesMB.numChildren++;
- return s;
- }, Statistics::add);
- s.statAABBNodesMB.numNodes++;
- s.statAABBNodesMB.nodeSAH += dt*A;
- s.depth++;
- }
- else if (node.isAABBNodeMB4D())
- {
- AABBNodeMB4D* n = node.getAABBNodeMB4D();
- s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
- if (n->child(i) == BVH::emptyNode) return Statistics();
- const BBox1f t0t1i = intersect(t0t1,n->timeRange(i));
- assert(!t0t1i.empty());
- const double Ai = n->AABBNodeMB::expectedHalfArea(i,t0t1i);
- Statistics s = statistics(n->child(i),Ai,t0t1i);
- s.statAABBNodesMB4D.numChildren++;
- return s;
- }, Statistics::add);
- s.statAABBNodesMB4D.numNodes++;
- s.statAABBNodesMB4D.nodeSAH += dt*A;
- s.depth++;
- }
- else if (node.isOBBNodeMB())
- {
- OBBNodeMB* n = node.ungetAABBNodeMB();
- s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
- if (n->child(i) == BVH::emptyNode) return Statistics();
- const double Ai = max(0.0f,halfArea(n->extent0(i)));
- Statistics s = statistics(n->child(i),Ai,t0t1);
- s.statOBBNodesMB.numChildren++;
- return s;
- }, Statistics::add);
- s.statOBBNodesMB.numNodes++;
- s.statOBBNodesMB.nodeSAH += dt*A;
- s.depth++;
- }
- else if (node.isQuantizedNode())
- {
- QuantizedNode* n = node.quantizedNode();
- s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
- if (n->child(i) == BVH::emptyNode) return Statistics();
- const double Ai = max(0.0f,halfArea(n->extent(i)));
- Statistics s = statistics(n->child(i),Ai,t0t1);
- s.statQuantizedNodes.numChildren++;
- return s;
- }, Statistics::add);
- s.statQuantizedNodes.numNodes++;
- s.statQuantizedNodes.nodeSAH += dt*A;
- s.depth++;
- }
- else if (node.isLeaf())
- {
- size_t num; const char* tri = node.leaf(num);
- if (num)
- {
- for (size_t i=0; i<num; i++)
- {
- const size_t bytes = bvh->primTy->getBytes(tri);
- s.statLeaf.numPrimsActive += bvh->primTy->sizeActive(tri);
- s.statLeaf.numPrimsTotal += bvh->primTy->sizeTotal(tri);
- s.statLeaf.numBytes += bytes;
- tri+=bytes;
- }
- s.statLeaf.numLeaves++;
- s.statLeaf.numPrimBlocks += num;
- s.statLeaf.leafSAH += dt*A*num;
- if (num-1 < Statistics::LeafStat::NHIST) {
- s.statLeaf.numPrimBlocksHistogram[num-1]++;
- }
- }
- }
- else {
- // -- GODOT start --
- // throw std::runtime_error("not supported node type in bvh_statistics");
- abort();
- // -- GODOT end --
- }
- return s;
- }
-
-#if defined(__AVX__)
- template class BVHNStatistics<8>;
-#endif
-
-#if !defined(__AVX__) || (!defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42)) || defined(__aarch64__)
- template class BVHNStatistics<4>;
-#endif
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.h
deleted file mode 100644
index 73dfc6fbcc..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.h
+++ /dev/null
@@ -1,285 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-#include <sstream>
-
-namespace embree
-{
- template<int N>
- class BVHNStatistics
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::AABBNode AABBNode;
- typedef typename BVH::OBBNode OBBNode;
- typedef typename BVH::AABBNodeMB AABBNodeMB;
- typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
- typedef typename BVH::OBBNodeMB OBBNodeMB;
- typedef typename BVH::QuantizedNode QuantizedNode;
-
- typedef typename BVH::NodeRef NodeRef;
-
- struct Statistics
- {
- template<typename Node>
- struct NodeStat
- {
- NodeStat ( double nodeSAH = 0,
- size_t numNodes = 0,
- size_t numChildren = 0)
- : nodeSAH(nodeSAH),
- numNodes(numNodes),
- numChildren(numChildren) {}
-
- double sah(BVH* bvh) const {
- return nodeSAH/bvh->getLinearBounds().expectedHalfArea();
- }
-
- size_t bytes() const {
- return numNodes*sizeof(Node);
- }
-
- size_t size() const {
- return numNodes;
- }
-
- double fillRateNom () const { return double(numChildren); }
- double fillRateDen () const { return double(numNodes*N); }
- double fillRate () const { return fillRateNom()/fillRateDen(); }
-
- __forceinline friend NodeStat operator+ ( const NodeStat& a, const NodeStat& b)
- {
- return NodeStat(a.nodeSAH + b.nodeSAH,
- a.numNodes+b.numNodes,
- a.numChildren+b.numChildren);
- }
-
- std::string toString(BVH* bvh, double sahTotal, size_t bytesTotal) const
- {
- std::ostringstream stream;
- stream.setf(std::ios::fixed, std::ios::floatfield);
- stream << "sah = " << std::setw(7) << std::setprecision(3) << sah(bvh);
- stream << " (" << std::setw(6) << std::setprecision(2) << 100.0*sah(bvh)/sahTotal << "%), ";
- stream << "#bytes = " << std::setw(7) << std::setprecision(2) << bytes()/1E6 << " MB ";
- stream << "(" << std::setw(6) << std::setprecision(2) << 100.0*double(bytes())/double(bytesTotal) << "%), ";
- stream << "#nodes = " << std::setw(7) << numNodes << " (" << std::setw(6) << std::setprecision(2) << 100.0*fillRate() << "% filled), ";
- stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytes())/double(bvh->numPrimitives);
- return stream.str();
- }
-
- public:
- double nodeSAH;
- size_t numNodes;
- size_t numChildren;
- };
-
- struct LeafStat
- {
- static const int NHIST = 8;
-
- LeafStat ( double leafSAH = 0.0f,
- size_t numLeaves = 0,
- size_t numPrimsActive = 0,
- size_t numPrimsTotal = 0,
- size_t numPrimBlocks = 0,
- size_t numBytes = 0)
- : leafSAH(leafSAH),
- numLeaves(numLeaves),
- numPrimsActive(numPrimsActive),
- numPrimsTotal(numPrimsTotal),
- numPrimBlocks(numPrimBlocks),
- numBytes(numBytes)
- {
- for (size_t i=0; i<NHIST; i++)
- numPrimBlocksHistogram[i] = 0;
- }
-
- double sah(BVH* bvh) const {
- return leafSAH/bvh->getLinearBounds().expectedHalfArea();
- }
-
- size_t bytes(BVH* bvh) const {
- return numBytes;
- }
-
- size_t size() const {
- return numLeaves;
- }
-
- double fillRateNom (BVH* bvh) const { return double(numPrimsActive); }
- double fillRateDen (BVH* bvh) const { return double(numPrimsTotal); }
- double fillRate (BVH* bvh) const { return fillRateNom(bvh)/fillRateDen(bvh); }
-
- __forceinline friend LeafStat operator+ ( const LeafStat& a, const LeafStat& b)
- {
- LeafStat stat(a.leafSAH + b.leafSAH,
- a.numLeaves+b.numLeaves,
- a.numPrimsActive+b.numPrimsActive,
- a.numPrimsTotal+b.numPrimsTotal,
- a.numPrimBlocks+b.numPrimBlocks,
- a.numBytes+b.numBytes);
- for (size_t i=0; i<NHIST; i++) {
- stat.numPrimBlocksHistogram[i] += a.numPrimBlocksHistogram[i];
- stat.numPrimBlocksHistogram[i] += b.numPrimBlocksHistogram[i];
- }
- return stat;
- }
-
- std::string toString(BVH* bvh, double sahTotal, size_t bytesTotal) const
- {
- std::ostringstream stream;
- stream.setf(std::ios::fixed, std::ios::floatfield);
- stream << "sah = " << std::setw(7) << std::setprecision(3) << sah(bvh);
- stream << " (" << std::setw(6) << std::setprecision(2) << 100.0*sah(bvh)/sahTotal << "%), ";
- stream << "#bytes = " << std::setw(7) << std::setprecision(2) << double(bytes(bvh))/1E6 << " MB ";
- stream << "(" << std::setw(6) << std::setprecision(2) << 100.0*double(bytes(bvh))/double(bytesTotal) << "%), ";
- stream << "#nodes = " << std::setw(7) << numLeaves << " (" << std::setw(6) << std::setprecision(2) << 100.0*fillRate(bvh) << "% filled), ";
- stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytes(bvh))/double(bvh->numPrimitives);
- return stream.str();
- }
-
- std::string histToString() const
- {
- std::ostringstream stream;
- stream.setf(std::ios::fixed, std::ios::floatfield);
- for (size_t i=0; i<NHIST; i++)
- stream << std::setw(6) << std::setprecision(2) << 100.0f*float(numPrimBlocksHistogram[i])/float(numLeaves) << "% ";
- return stream.str();
- }
-
- public:
- double leafSAH; //!< SAH of the leaves only
- size_t numLeaves; //!< Number of leaf nodes.
- size_t numPrimsActive; //!< Number of active primitives (
- size_t numPrimsTotal; //!< Number of active and inactive primitives
- size_t numPrimBlocks; //!< Number of primitive blocks.
- size_t numBytes; //!< Number of bytes of leaves.
- size_t numPrimBlocksHistogram[8];
- };
-
- public:
- Statistics (size_t depth = 0,
- LeafStat statLeaf = LeafStat(),
- NodeStat<AABBNode> statAABBNodes = NodeStat<AABBNode>(),
- NodeStat<OBBNode> statOBBNodes = NodeStat<OBBNode>(),
- NodeStat<AABBNodeMB> statAABBNodesMB = NodeStat<AABBNodeMB>(),
- NodeStat<AABBNodeMB4D> statAABBNodesMB4D = NodeStat<AABBNodeMB4D>(),
- NodeStat<OBBNodeMB> statOBBNodesMB = NodeStat<OBBNodeMB>(),
- NodeStat<QuantizedNode> statQuantizedNodes = NodeStat<QuantizedNode>())
-
- : depth(depth),
- statLeaf(statLeaf),
- statAABBNodes(statAABBNodes),
- statOBBNodes(statOBBNodes),
- statAABBNodesMB(statAABBNodesMB),
- statAABBNodesMB4D(statAABBNodesMB4D),
- statOBBNodesMB(statOBBNodesMB),
- statQuantizedNodes(statQuantizedNodes) {}
-
- double sah(BVH* bvh) const
- {
- return statLeaf.sah(bvh) +
- statAABBNodes.sah(bvh) +
- statOBBNodes.sah(bvh) +
- statAABBNodesMB.sah(bvh) +
- statAABBNodesMB4D.sah(bvh) +
- statOBBNodesMB.sah(bvh) +
- statQuantizedNodes.sah(bvh);
- }
-
- size_t bytes(BVH* bvh) const {
- return statLeaf.bytes(bvh) +
- statAABBNodes.bytes() +
- statOBBNodes.bytes() +
- statAABBNodesMB.bytes() +
- statAABBNodesMB4D.bytes() +
- statOBBNodesMB.bytes() +
- statQuantizedNodes.bytes();
- }
-
- size_t size() const
- {
- return statLeaf.size() +
- statAABBNodes.size() +
- statOBBNodes.size() +
- statAABBNodesMB.size() +
- statAABBNodesMB4D.size() +
- statOBBNodesMB.size() +
- statQuantizedNodes.size();
- }
-
- double fillRate (BVH* bvh) const
- {
- double nom = statLeaf.fillRateNom(bvh) +
- statAABBNodes.fillRateNom() +
- statOBBNodes.fillRateNom() +
- statAABBNodesMB.fillRateNom() +
- statAABBNodesMB4D.fillRateNom() +
- statOBBNodesMB.fillRateNom() +
- statQuantizedNodes.fillRateNom();
- double den = statLeaf.fillRateDen(bvh) +
- statAABBNodes.fillRateDen() +
- statOBBNodes.fillRateDen() +
- statAABBNodesMB.fillRateDen() +
- statAABBNodesMB4D.fillRateDen() +
- statOBBNodesMB.fillRateDen() +
- statQuantizedNodes.fillRateDen();
- return nom/den;
- }
-
- friend Statistics operator+ ( const Statistics& a, const Statistics& b )
- {
- return Statistics(max(a.depth,b.depth),
- a.statLeaf + b.statLeaf,
- a.statAABBNodes + b.statAABBNodes,
- a.statOBBNodes + b.statOBBNodes,
- a.statAABBNodesMB + b.statAABBNodesMB,
- a.statAABBNodesMB4D + b.statAABBNodesMB4D,
- a.statOBBNodesMB + b.statOBBNodesMB,
- a.statQuantizedNodes + b.statQuantizedNodes);
- }
-
- static Statistics add ( const Statistics& a, const Statistics& b ) {
- return a+b;
- }
-
- public:
- size_t depth;
- LeafStat statLeaf;
- NodeStat<AABBNode> statAABBNodes;
- NodeStat<OBBNode> statOBBNodes;
- NodeStat<AABBNodeMB> statAABBNodesMB;
- NodeStat<AABBNodeMB4D> statAABBNodesMB4D;
- NodeStat<OBBNodeMB> statOBBNodesMB;
- NodeStat<QuantizedNode> statQuantizedNodes;
- };
-
- public:
-
- /* Constructor gathers statistics. */
- BVHNStatistics (BVH* bvh);
-
- /*! Convert statistics into a string */
- std::string str();
-
- double sah() const {
- return stat.sah(bvh);
- }
-
- size_t bytesUsed() const {
- return stat.bytes(bvh);
- }
-
- private:
- Statistics statistics(NodeRef node, const double A, const BBox1f dt);
-
- private:
- BVH* bvh;
- Statistics stat;
- };
-
- typedef BVHNStatistics<4> BVH4Statistics;
- typedef BVHNStatistics<8> BVH8Statistics;
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser1.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser1.h
deleted file mode 100644
index 7f17084b81..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser1.h
+++ /dev/null
@@ -1,676 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-#include "node_intersector1.h"
-#include "../common/stack_item.h"
-
-#define NEW_SORTING_CODE 1
-
-namespace embree
-{
- namespace isa
- {
- /*! BVH regular node traversal for single rays. */
- template<int N, int Nx, int types>
- class BVHNNodeTraverser1Hit;
-
- /*! Helper functions for fast sorting using AVX512 instructions. */
-#if defined(__AVX512ER__)
-
- /* KNL code path */
- __forceinline void isort_update(vfloat16 &dist, vllong8 &ptr, const vfloat16 &d, const vllong8 &p)
- {
- const vfloat16 dist_shift = align_shift_right<15>(dist,dist);
- const vllong8 ptr_shift = align_shift_right<7>(ptr,ptr);
- const vbool16 m_geq = d >= dist;
- const vbool16 m_geq_shift = m_geq << 1;
- dist = select(m_geq,d,dist);
- ptr = select(vboold8(m_geq),p,ptr);
- dist = select(m_geq_shift,dist_shift,dist);
- ptr = select(vboold8(m_geq_shift),ptr_shift,ptr);
- }
-
- __forceinline void isort_quick_update(vfloat16 &dist, vllong8 &ptr, const vfloat16 &d, const vllong8 &p)
- {
- //dist = align_shift_right<15>(dist,d);
- //ptr = align_shift_right<7>(ptr,p);
- dist = align_shift_right<15>(dist,permute(d,vint16(zero)));
- ptr = align_shift_right<7>(ptr,permute(p,vllong8(zero)));
- }
-
- template<int N, int Nx, int types, class NodeRef, class BaseNode>
- __forceinline void traverseClosestHitAVX512(NodeRef& cur,
- size_t mask,
- const vfloat<Nx>& tNear,
- StackItemT<NodeRef>*& stackPtr,
- StackItemT<NodeRef>* stackEnd)
- {
- assert(mask != 0);
- const BaseNode* node = cur.baseNode();
-
- vllong8 children( vllong<N>::loadu((void*)node->children) );
- children = vllong8::compact((int)mask,children);
- vfloat16 distance = tNear;
- distance = vfloat16::compact((int)mask,distance,tNear);
-
- cur = toScalar(children);
- BVHN<N>::prefetch(cur,types);
-
- mask &= mask-1;
- if (likely(mask == 0)) return;
-
- /* 2 hits: order A0 B0 */
- const vllong8 c0(children);
- const vfloat16 d0(distance);
- children = align_shift_right<1>(children,children);
- distance = align_shift_right<1>(distance,distance);
- const vllong8 c1(children);
- const vfloat16 d1(distance);
-
- cur = toScalar(children);
- BVHN<N>::prefetch(cur,types);
-
- /* a '<' keeps the order for equal distances, scenes like powerplant largely benefit from it */
- const vboolf16 m_dist = d0 < d1;
- const vfloat16 dist_A0 = select(m_dist, d0, d1);
- const vfloat16 dist_B0 = select(m_dist, d1, d0);
- const vllong8 ptr_A0 = select(vboold8(m_dist), c0, c1);
- const vllong8 ptr_B0 = select(vboold8(m_dist), c1, c0);
-
- mask &= mask-1;
- if (likely(mask == 0)) {
- cur = toScalar(ptr_A0);
- stackPtr[0].ptr = toScalar(ptr_B0);
- *(float*)&stackPtr[0].dist = toScalar(dist_B0);
- stackPtr++;
- return;
- }
-
- /* 3 hits: order A1 B1 C1 */
-
- children = align_shift_right<1>(children,children);
- distance = align_shift_right<1>(distance,distance);
-
- const vllong8 c2(children);
- const vfloat16 d2(distance);
-
- cur = toScalar(children);
- BVHN<N>::prefetch(cur,types);
-
- const vboolf16 m_dist1 = dist_A0 <= d2;
- const vfloat16 dist_tmp_B1 = select(m_dist1, d2, dist_A0);
- const vllong8 ptr_A1 = select(vboold8(m_dist1), ptr_A0, c2);
- const vllong8 ptr_tmp_B1 = select(vboold8(m_dist1), c2, ptr_A0);
-
- const vboolf16 m_dist2 = dist_B0 <= dist_tmp_B1;
- const vfloat16 dist_B1 = select(m_dist2, dist_B0 , dist_tmp_B1);
- const vfloat16 dist_C1 = select(m_dist2, dist_tmp_B1, dist_B0);
- const vllong8 ptr_B1 = select(vboold8(m_dist2), ptr_B0, ptr_tmp_B1);
- const vllong8 ptr_C1 = select(vboold8(m_dist2), ptr_tmp_B1, ptr_B0);
-
- mask &= mask-1;
- if (likely(mask == 0)) {
- cur = toScalar(ptr_A1);
- stackPtr[0].ptr = toScalar(ptr_C1);
- *(float*)&stackPtr[0].dist = toScalar(dist_C1);
- stackPtr[1].ptr = toScalar(ptr_B1);
- *(float*)&stackPtr[1].dist = toScalar(dist_B1);
- stackPtr+=2;
- return;
- }
-
- /* 4 hits: order A2 B2 C2 D2 */
-
- const vfloat16 dist_A1 = select(m_dist1, dist_A0, d2);
-
- children = align_shift_right<1>(children,children);
- distance = align_shift_right<1>(distance,distance);
-
- const vllong8 c3(children);
- const vfloat16 d3(distance);
-
- cur = toScalar(children);
- BVHN<N>::prefetch(cur,types);
-
- const vboolf16 m_dist3 = dist_A1 <= d3;
- const vfloat16 dist_tmp_B2 = select(m_dist3, d3, dist_A1);
- const vllong8 ptr_A2 = select(vboold8(m_dist3), ptr_A1, c3);
- const vllong8 ptr_tmp_B2 = select(vboold8(m_dist3), c3, ptr_A1);
-
- const vboolf16 m_dist4 = dist_B1 <= dist_tmp_B2;
- const vfloat16 dist_B2 = select(m_dist4, dist_B1 , dist_tmp_B2);
- const vfloat16 dist_tmp_C2 = select(m_dist4, dist_tmp_B2, dist_B1);
- const vllong8 ptr_B2 = select(vboold8(m_dist4), ptr_B1, ptr_tmp_B2);
- const vllong8 ptr_tmp_C2 = select(vboold8(m_dist4), ptr_tmp_B2, ptr_B1);
-
- const vboolf16 m_dist5 = dist_C1 <= dist_tmp_C2;
- const vfloat16 dist_C2 = select(m_dist5, dist_C1 , dist_tmp_C2);
- const vfloat16 dist_D2 = select(m_dist5, dist_tmp_C2, dist_C1);
- const vllong8 ptr_C2 = select(vboold8(m_dist5), ptr_C1, ptr_tmp_C2);
- const vllong8 ptr_D2 = select(vboold8(m_dist5), ptr_tmp_C2, ptr_C1);
-
- mask &= mask-1;
- if (likely(mask == 0)) {
- cur = toScalar(ptr_A2);
- stackPtr[0].ptr = toScalar(ptr_D2);
- *(float*)&stackPtr[0].dist = toScalar(dist_D2);
- stackPtr[1].ptr = toScalar(ptr_C2);
- *(float*)&stackPtr[1].dist = toScalar(dist_C2);
- stackPtr[2].ptr = toScalar(ptr_B2);
- *(float*)&stackPtr[2].dist = toScalar(dist_B2);
- stackPtr+=3;
- return;
- }
-
- /* >=5 hits: reverse to descending order for writing to stack */
-
- const size_t hits = 4 + popcnt(mask);
- const vfloat16 dist_A2 = select(m_dist3, dist_A1, d3);
- vfloat16 dist(neg_inf);
- vllong8 ptr(zero);
-
-
- isort_quick_update(dist,ptr,dist_A2,ptr_A2);
- isort_quick_update(dist,ptr,dist_B2,ptr_B2);
- isort_quick_update(dist,ptr,dist_C2,ptr_C2);
- isort_quick_update(dist,ptr,dist_D2,ptr_D2);
-
- do {
-
- children = align_shift_right<1>(children,children);
- distance = align_shift_right<1>(distance,distance);
-
- cur = toScalar(children);
- BVHN<N>::prefetch(cur,types);
-
- const vfloat16 new_dist(permute(distance,vint16(zero)));
- const vllong8 new_ptr(permute(children,vllong8(zero)));
-
- mask &= mask-1;
- isort_update(dist,ptr,new_dist,new_ptr);
-
- } while(mask);
-
- const vboold8 m_stack_ptr(0x55); // 10101010 (lsb -> msb)
- const vboolf16 m_stack_dist(0x4444); // 0010001000100010 (lsb -> msb)
-
- /* extract current noderef */
- cur = toScalar(permute(ptr,vllong8(hits-1)));
- /* rearrange pointers to beginning of 16 bytes block */
- vllong8 stackElementA0;
- stackElementA0 = vllong8::expand(m_stack_ptr,ptr,stackElementA0);
- /* put distances in between */
- vuint16 stackElementA1((__m512i)stackElementA0);
- stackElementA1 = vuint16::expand(m_stack_dist,asUInt(dist),stackElementA1);
- /* write out first 4 x 16 bytes block to stack */
- vuint16::storeu(stackPtr,stackElementA1);
- /* get upper half of dist and ptr */
- dist = align_shift_right<4>(dist,dist);
- ptr = align_shift_right<4>(ptr,ptr);
- /* assemble and write out second block */
- vllong8 stackElementB0;
- stackElementB0 = vllong8::expand(m_stack_ptr,ptr,stackElementB0);
- vuint16 stackElementB1((__m512i)stackElementB0);
- stackElementB1 = vuint16::expand(m_stack_dist,asUInt(dist),stackElementB1);
- vuint16::storeu(stackPtr + 4,stackElementB1);
- /* increase stack pointer */
- stackPtr += hits-1;
- }
-#endif
-
-#if defined(__AVX512VL__) // SKX
-
- template<int N>
- __forceinline void isort_update(vint<N> &dist, const vint<N> &d)
- {
- const vint<N> dist_shift = align_shift_right<N-1>(dist,dist);
- const vboolf<N> m_geq = d >= dist;
- const vboolf<N> m_geq_shift = m_geq << 1;
- dist = select(m_geq,d,dist);
- dist = select(m_geq_shift,dist_shift,dist);
- }
-
- template<int N>
- __forceinline void isort_quick_update(vint<N> &dist, const vint<N> &d) {
- dist = align_shift_right<N-1>(dist,permute(d,vint<N>(zero)));
- }
-
- __forceinline size_t permuteExtract(const vint8& index, const vllong4& n0, const vllong4& n1) {
- return toScalar(permutex2var((__m256i)index,n0,n1));
- }
-
- __forceinline float permuteExtract(const vint8& index, const vfloat8& n) {
- return toScalar(permute(n,index));
- }
-
-#endif
-
- /* Specialization for BVH4. */
- template<int Nx, int types>
- class BVHNNodeTraverser1Hit<4, Nx, types>
- {
- typedef BVH4 BVH;
- typedef BVH4::NodeRef NodeRef;
- typedef BVH4::BaseNode BaseNode;
-
-
- public:
- /* Traverses a node with at least one hit child. Optimized for finding the closest hit (intersection). */
- static __forceinline void traverseClosestHit(NodeRef& cur,
- size_t mask,
- const vfloat<Nx>& tNear,
- StackItemT<NodeRef>*& stackPtr,
- StackItemT<NodeRef>* stackEnd)
- {
- assert(mask != 0);
-#if defined(__AVX512ER__)
- traverseClosestHitAVX512<4,Nx,types,NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd);
-#else
- const BaseNode* node = cur.baseNode();
-
- /*! one child is hit, continue with that child */
- size_t r = bscf(mask);
- cur = node->child(r);
- BVH::prefetch(cur,types);
- if (likely(mask == 0)) {
- assert(cur != BVH::emptyNode);
- return;
- }
-
- /*! two children are hit, push far child, and continue with closer child */
- NodeRef c0 = cur;
- const unsigned int d0 = ((unsigned int*)&tNear)[r];
- r = bscf(mask);
- NodeRef c1 = node->child(r);
- BVH::prefetch(c1,types);
- const unsigned int d1 = ((unsigned int*)&tNear)[r];
- assert(c0 != BVH::emptyNode);
- assert(c1 != BVH::emptyNode);
- if (likely(mask == 0)) {
- assert(stackPtr < stackEnd);
- if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; }
- else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; }
- }
-
-#if NEW_SORTING_CODE == 1
- vint4 s0((size_t)c0,(size_t)d0);
- vint4 s1((size_t)c1,(size_t)d1);
- r = bscf(mask);
- NodeRef c2 = node->child(r); BVH::prefetch(c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r];
- vint4 s2((size_t)c2,(size_t)d2);
- /* 3 hits */
- if (likely(mask == 0)) {
- StackItemT<NodeRef>::sort3(s0,s1,s2);
- *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1;
- cur = toSizeT(s2);
- stackPtr+=2;
- return;
- }
- r = bscf(mask);
- NodeRef c3 = node->child(r); BVH::prefetch(c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r];
- vint4 s3((size_t)c3,(size_t)d3);
- /* 4 hits */
- StackItemT<NodeRef>::sort4(s0,s1,s2,s3);
- *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2;
- cur = toSizeT(s3);
- stackPtr+=3;
-#else
- /*! Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there. */
- assert(stackPtr < stackEnd);
- stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
- assert(stackPtr < stackEnd);
- stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;
-
- /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
- assert(stackPtr < stackEnd);
- r = bscf(mask);
- NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
- assert(c != BVH::emptyNode);
- if (likely(mask == 0)) {
- sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
- cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
- return;
- }
-
- /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
- assert(stackPtr < stackEnd);
- r = bscf(mask);
- c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
- assert(c != BVH::emptyNode);
- sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
- cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
-#endif
-#endif
- }
-
- /* Traverses a node with at least one hit child. Optimized for finding any hit (occlusion). */
- static __forceinline void traverseAnyHit(NodeRef& cur,
- size_t mask,
- const vfloat<Nx>& tNear,
- NodeRef*& stackPtr,
- NodeRef* stackEnd)
- {
- const BaseNode* node = cur.baseNode();
-
- /*! one child is hit, continue with that child */
- size_t r = bscf(mask);
- cur = node->child(r);
- BVH::prefetch(cur,types);
-
- /* simpler in sequence traversal order */
- assert(cur != BVH::emptyNode);
- if (likely(mask == 0)) return;
- assert(stackPtr < stackEnd);
- *stackPtr = cur; stackPtr++;
-
- for (; ;)
- {
- r = bscf(mask);
- cur = node->child(r); BVH::prefetch(cur,types);
- assert(cur != BVH::emptyNode);
- if (likely(mask == 0)) return;
- assert(stackPtr < stackEnd);
- *stackPtr = cur; stackPtr++;
- }
- }
- };
-
- /* Specialization for BVH8. */
- template<int Nx, int types>
- class BVHNNodeTraverser1Hit<8, Nx, types>
- {
- typedef BVH8 BVH;
- typedef BVH8::NodeRef NodeRef;
- typedef BVH8::BaseNode BaseNode;
-
-#if defined(__AVX512VL__)
- template<class NodeRef, class BaseNode>
- static __forceinline void traverseClosestHitAVX512VL8(NodeRef& cur,
- size_t mask,
- const vfloat8& tNear,
- StackItemT<NodeRef>*& stackPtr,
- StackItemT<NodeRef>* stackEnd)
- {
- assert(mask != 0);
- const BaseNode* node = cur.baseNode();
- const vllong4 n0 = vllong4::loadu((vllong4*)&node->children[0]);
- const vllong4 n1 = vllong4::loadu((vllong4*)&node->children[4]);
- vint8 distance_i = (asInt(tNear) & 0xfffffff8) | vint8(step);
- distance_i = vint8::compact((int)mask,distance_i,distance_i);
- cur = permuteExtract(distance_i,n0,n1);
- BVH::prefetch(cur,types);
-
- mask &= mask-1;
- if (likely(mask == 0)) return;
-
- /* 2 hits: order A0 B0 */
- const vint8 d0(distance_i);
- const vint8 d1(shuffle<1>(distance_i));
- cur = permuteExtract(d1,n0,n1);
- BVH::prefetch(cur,types);
-
- const vint8 dist_A0 = min(d0, d1);
- const vint8 dist_B0 = max(d0, d1);
- assert(dist_A0[0] < dist_B0[0]);
-
- mask &= mask-1;
- if (likely(mask == 0)) {
- cur = permuteExtract(dist_A0,n0,n1);
- stackPtr[0].ptr = permuteExtract(dist_B0,n0,n1);
- *(float*)&stackPtr[0].dist = permuteExtract(dist_B0,tNear);
- stackPtr++;
- return;
- }
-
- /* 3 hits: order A1 B1 C1 */
-
- const vint8 d2(shuffle<2>(distance_i));
- cur = permuteExtract(d2,n0,n1);
- BVH::prefetch(cur,types);
-
- const vint8 dist_A1 = min(dist_A0,d2);
- const vint8 dist_tmp_B1 = max(dist_A0,d2);
- const vint8 dist_B1 = min(dist_B0,dist_tmp_B1);
- const vint8 dist_C1 = max(dist_B0,dist_tmp_B1);
- assert(dist_A1[0] < dist_B1[0]);
- assert(dist_B1[0] < dist_C1[0]);
-
- mask &= mask-1;
- if (likely(mask == 0)) {
- cur = permuteExtract(dist_A1,n0,n1);
- stackPtr[0].ptr = permuteExtract(dist_C1,n0,n1);
- *(float*)&stackPtr[0].dist = permuteExtract(dist_C1,tNear);
- stackPtr[1].ptr = permuteExtract(dist_B1,n0,n1);
- *(float*)&stackPtr[1].dist = permuteExtract(dist_B1,tNear);
- stackPtr+=2;
- return;
- }
-
- /* 4 hits: order A2 B2 C2 D2 */
-
- const vint8 d3(shuffle<3>(distance_i));
- cur = permuteExtract(d3,n0,n1);
- BVH::prefetch(cur,types);
-
- const vint8 dist_A2 = min(dist_A1,d3);
- const vint8 dist_tmp_B2 = max(dist_A1,d3);
- const vint8 dist_B2 = min(dist_B1,dist_tmp_B2);
- const vint8 dist_tmp_C2 = max(dist_B1,dist_tmp_B2);
- const vint8 dist_C2 = min(dist_C1,dist_tmp_C2);
- const vint8 dist_D2 = max(dist_C1,dist_tmp_C2);
- assert(dist_A2[0] < dist_B2[0]);
- assert(dist_B2[0] < dist_C2[0]);
- assert(dist_C2[0] < dist_D2[0]);
-
- mask &= mask-1;
- if (likely(mask == 0)) {
- cur = permuteExtract(dist_A2,n0,n1);
- stackPtr[0].ptr = permuteExtract(dist_D2,n0,n1);
- *(float*)&stackPtr[0].dist = permuteExtract(dist_D2,tNear);
- stackPtr[1].ptr = permuteExtract(dist_C2,n0,n1);
- *(float*)&stackPtr[1].dist = permuteExtract(dist_C2,tNear);
- stackPtr[2].ptr = permuteExtract(dist_B2,n0,n1);
- *(float*)&stackPtr[2].dist = permuteExtract(dist_B2,tNear);
- stackPtr+=3;
- return;
- }
-
- /* >=5 hits: reverse to descending order for writing to stack */
-
- distance_i = align_shift_right<3>(distance_i,distance_i);
- const size_t hits = 4 + popcnt(mask);
- vint8 dist(INT_MIN); // this will work with -0.0f (0x80000000) as distance, isort_update uses >= to insert
-
- isort_quick_update(dist,dist_A2);
- isort_quick_update(dist,dist_B2);
- isort_quick_update(dist,dist_C2);
- isort_quick_update(dist,dist_D2);
-
- do {
-
- distance_i = align_shift_right<1>(distance_i,distance_i);
- cur = permuteExtract(distance_i,n0,n1);
- BVH::prefetch(cur,types);
- const vint8 new_dist(permute(distance_i,vint8(zero)));
- mask &= mask-1;
- isort_update(dist,new_dist);
-
- } while(mask);
-
- for (size_t i=0; i<7; i++)
- assert(dist[i+0]>=dist[i+1]);
-
- for (size_t i=0;i<hits-1;i++)
- {
- stackPtr->ptr = permuteExtract(dist,n0,n1);
- *(float*)&stackPtr->dist = permuteExtract(dist,tNear);
- dist = align_shift_right<1>(dist,dist);
- stackPtr++;
- }
- cur = permuteExtract(dist,n0,n1);
- }
-#endif
-
- public:
- static __forceinline void traverseClosestHit(NodeRef& cur,
- size_t mask,
- const vfloat<Nx>& tNear,
- StackItemT<NodeRef>*& stackPtr,
- StackItemT<NodeRef>* stackEnd)
- {
- assert(mask != 0);
-#if defined(__AVX512ER__)
- traverseClosestHitAVX512<8,Nx,types,NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd);
-#elif defined(__AVX512VL__)
- traverseClosestHitAVX512VL8<NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd);
-#else
-
- const BaseNode* node = cur.baseNode();
-
- /*! one child is hit, continue with that child */
- size_t r = bscf(mask);
- cur = node->child(r);
- BVH::prefetch(cur,types);
- if (likely(mask == 0)) {
- assert(cur != BVH::emptyNode);
- return;
- }
-
- /*! two children are hit, push far child, and continue with closer child */
- NodeRef c0 = cur;
- const unsigned int d0 = ((unsigned int*)&tNear)[r];
- r = bscf(mask);
- NodeRef c1 = node->child(r);
- BVH::prefetch(c1,types);
- const unsigned int d1 = ((unsigned int*)&tNear)[r];
-
- assert(c0 != BVH::emptyNode);
- assert(c1 != BVH::emptyNode);
- if (likely(mask == 0)) {
- assert(stackPtr < stackEnd);
- if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; }
- else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; }
- }
-#if NEW_SORTING_CODE == 1
- vint4 s0((size_t)c0,(size_t)d0);
- vint4 s1((size_t)c1,(size_t)d1);
-
- r = bscf(mask);
- NodeRef c2 = node->child(r); BVH::prefetch(c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r];
- vint4 s2((size_t)c2,(size_t)d2);
- /* 3 hits */
- if (likely(mask == 0)) {
- StackItemT<NodeRef>::sort3(s0,s1,s2);
- *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1;
- cur = toSizeT(s2);
- stackPtr+=2;
- return;
- }
- r = bscf(mask);
- NodeRef c3 = node->child(r); BVH::prefetch(c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r];
- vint4 s3((size_t)c3,(size_t)d3);
- /* 4 hits */
- if (likely(mask == 0)) {
- StackItemT<NodeRef>::sort4(s0,s1,s2,s3);
- *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2;
- cur = toSizeT(s3);
- stackPtr+=3;
- return;
- }
- *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2; *(vint4*)&stackPtr[3] = s3;
- /*! fallback case if more than 4 children are hit */
- StackItemT<NodeRef>* stackFirst = stackPtr;
- stackPtr+=4;
- while (1)
- {
- assert(stackPtr < stackEnd);
- r = bscf(mask);
- NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = *(unsigned int*)&tNear[r];
- const vint4 s((size_t)c,(size_t)d);
- *(vint4*)stackPtr++ = s;
- assert(c != BVH::emptyNode);
- if (unlikely(mask == 0)) break;
- }
- sort(stackFirst,stackPtr);
- cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
-#else
- /*! Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there. */
- assert(stackPtr < stackEnd);
- stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
- assert(stackPtr < stackEnd);
- stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;
-
- /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
- assert(stackPtr < stackEnd);
- r = bscf(mask);
- NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
- assert(c != BVH::emptyNode);
- if (likely(mask == 0)) {
- sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
- cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
- return;
- }
-
- /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
- assert(stackPtr < stackEnd);
- r = bscf(mask);
- c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
- assert(c != BVH::emptyNode);
- if (likely(mask == 0)) {
- sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
- cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
- return;
- }
- /*! fallback case if more than 4 children are hit */
- StackItemT<NodeRef>* stackFirst = stackPtr-4;
- while (1)
- {
- assert(stackPtr < stackEnd);
- r = bscf(mask);
- c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
- assert(c != BVH::emptyNode);
- if (unlikely(mask == 0)) break;
- }
- sort(stackFirst,stackPtr);
- cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
-#endif
-#endif
- }
-
- static __forceinline void traverseAnyHit(NodeRef& cur,
- size_t mask,
- const vfloat<Nx>& tNear,
- NodeRef*& stackPtr,
- NodeRef* stackEnd)
- {
- const BaseNode* node = cur.baseNode();
-
- /*! one child is hit, continue with that child */
- size_t r = bscf(mask);
- cur = node->child(r);
- BVH::prefetch(cur,types);
-
- /* simpler in sequence traversal order */
- assert(cur != BVH::emptyNode);
- if (likely(mask == 0)) return;
- assert(stackPtr < stackEnd);
- *stackPtr = cur; stackPtr++;
-
- for (; ;)
- {
- r = bscf(mask);
- cur = node->child(r); BVH::prefetch(cur,types);
- assert(cur != BVH::emptyNode);
- if (likely(mask == 0)) return;
- assert(stackPtr < stackEnd);
- *stackPtr = cur; stackPtr++;
- }
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser_stream.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser_stream.h
deleted file mode 100644
index 9c603babf0..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser_stream.h
+++ /dev/null
@@ -1,154 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-#include "../common/ray.h"
-#include "../common/stack_item.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int N, int Nx, int types>
- class BVHNNodeTraverserStreamHitCoherent
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::BaseNode BaseNode;
-
- public:
- template<class T>
- static __forceinline void traverseClosestHit(NodeRef& cur,
- size_t& m_trav_active,
- const vbool<Nx>& vmask,
- const vfloat<Nx>& tNear,
- const T* const tMask,
- StackItemMaskCoherent*& stackPtr)
- {
- const NodeRef parent = cur;
- size_t mask = movemask(vmask);
- assert(mask != 0);
- const BaseNode* node = cur.baseNode();
-
- /*! one child is hit, continue with that child */
- const size_t r0 = bscf(mask);
- assert(r0 < 8);
- cur = node->child(r0);
- BVHN<N>::prefetch(cur,types);
- m_trav_active = tMask[r0];
- assert(cur != BVH::emptyNode);
- if (unlikely(mask == 0)) return;
-
- const unsigned int* const tNear_i = (unsigned int*)&tNear;
-
- /*! two children are hit, push far child, and continue with closer child */
- NodeRef c0 = cur;
- unsigned int d0 = tNear_i[r0];
- const size_t r1 = bscf(mask);
- assert(r1 < 8);
- NodeRef c1 = node->child(r1);
- BVHN<N>::prefetch(c1,types);
- unsigned int d1 = tNear_i[r1];
-
- assert(c0 != BVH::emptyNode);
- assert(c1 != BVH::emptyNode);
- if (likely(mask == 0)) {
- if (d0 < d1) {
- assert(tNear[r1] >= 0.0f);
- stackPtr->mask = tMask[r1];
- stackPtr->parent = parent;
- stackPtr->child = c1;
- stackPtr++;
- cur = c0;
- m_trav_active = tMask[r0];
- return;
- }
- else {
- assert(tNear[r0] >= 0.0f);
- stackPtr->mask = tMask[r0];
- stackPtr->parent = parent;
- stackPtr->child = c0;
- stackPtr++;
- cur = c1;
- m_trav_active = tMask[r1];
- return;
- }
- }
-
- /*! slow path for more than two hits */
- size_t hits = movemask(vmask);
- const vint<Nx> dist_i = select(vmask, (asInt(tNear) & 0xfffffff8) | vint<Nx>(step), 0);
- #if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL
- const vint<N> tmp = extractN<N,0>(dist_i);
- const vint<Nx> dist_i_sorted = usort_descending(tmp);
- #else
- const vint<Nx> dist_i_sorted = usort_descending(dist_i);
- #endif
- const vint<Nx> sorted_index = dist_i_sorted & 7;
-
- size_t i = 0;
- for (;;)
- {
- const unsigned int index = sorted_index[i];
- assert(index < 8);
- cur = node->child(index);
- m_trav_active = tMask[index];
- assert(m_trav_active);
- BVHN<N>::prefetch(cur,types);
- bscf(hits);
- if (unlikely(hits==0)) break;
- i++;
- assert(cur != BVH::emptyNode);
- assert(tNear[index] >= 0.0f);
- stackPtr->mask = m_trav_active;
- stackPtr->parent = parent;
- stackPtr->child = cur;
- stackPtr++;
- }
- }
-
- template<class T>
- static __forceinline void traverseAnyHit(NodeRef& cur,
- size_t& m_trav_active,
- const vbool<Nx>& vmask,
- const T* const tMask,
- StackItemMaskCoherent*& stackPtr)
- {
- const NodeRef parent = cur;
- size_t mask = movemask(vmask);
- assert(mask != 0);
- const BaseNode* node = cur.baseNode();
-
- /*! one child is hit, continue with that child */
- size_t r = bscf(mask);
- cur = node->child(r);
- BVHN<N>::prefetch(cur,types);
- m_trav_active = tMask[r];
-
- /* simple in order sequence */
- assert(cur != BVH::emptyNode);
- if (likely(mask == 0)) return;
- stackPtr->mask = m_trav_active;
- stackPtr->parent = parent;
- stackPtr->child = cur;
- stackPtr++;
-
- for (; ;)
- {
- r = bscf(mask);
- cur = node->child(r);
- BVHN<N>::prefetch(cur,types);
- m_trav_active = tMask[r];
- assert(cur != BVH::emptyNode);
- if (likely(mask == 0)) return;
- stackPtr->mask = m_trav_active;
- stackPtr->parent = parent;
- stackPtr->child = cur;
- stackPtr++;
- }
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector.h b/thirdparty/embree-aarch64/kernels/bvh/node_intersector.h
deleted file mode 100644
index a978c0c459..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-
-namespace embree
-{
- namespace isa
- {
- struct NearFarPrecalculations
- {
- size_t nearX, nearY, nearZ;
- size_t farX, farY, farZ;
-
- __forceinline NearFarPrecalculations() {}
-
- __forceinline NearFarPrecalculations(const Vec3fa& dir, size_t N)
- {
- const size_t size = sizeof(float)*N;
- nearX = (dir.x < 0.0f) ? 1*size : 0*size;
- nearY = (dir.y < 0.0f) ? 3*size : 2*size;
- nearZ = (dir.z < 0.0f) ? 5*size : 4*size;
- farX = nearX ^ size;
- farY = nearY ^ size;
- farZ = nearZ ^ size;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector1.h b/thirdparty/embree-aarch64/kernels/bvh/node_intersector1.h
deleted file mode 100644
index aa0d4ba4d7..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector1.h
+++ /dev/null
@@ -1,1788 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "node_intersector.h"
-
-#if defined(__AVX2__)
-#define __FMA_X4__
-#endif
-
-#if defined(__aarch64__)
-#define __FMA_X4__
-#endif
-
-
-namespace embree
-{
- namespace isa
- {
- //////////////////////////////////////////////////////////////////////////////////////
- // Ray structure used in single-ray traversal
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int Nx, bool robust>
- struct TravRayBase;
-
- /* Base (without tnear and tfar) */
- template<int N, int Nx>
- struct TravRayBase<N,Nx,false>
- {
- __forceinline TravRayBase() {}
-
- __forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir)
- : org_xyz(ray_org), dir_xyz(ray_dir)
- {
- const Vec3fa ray_rdir = rcp_safe(ray_dir);
- org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z);
- dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z);
- rdir = Vec3vf<N>(ray_rdir.x,ray_rdir.y,ray_rdir.z);
-#if defined(__FMA_X4__)
- const Vec3fa ray_org_rdir = ray_org*ray_rdir;
-#if !defined(__aarch64__)
- org_rdir = Vec3vf<N>(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z);
-#else
- //for aarch64, we do not have msub equal instruction, so we negeate orig and use madd
- //x86 will use msub
- neg_org_rdir = Vec3vf<N>(-ray_org_rdir.x,-ray_org_rdir.y,-ray_org_rdir.z);
-#endif
-#endif
- nearX = ray_rdir.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>);
- nearY = ray_rdir.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>);
- nearZ = ray_rdir.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>);
- farX = nearX ^ sizeof(vfloat<N>);
- farY = nearY ^ sizeof(vfloat<N>);
- farZ = nearZ ^ sizeof(vfloat<N>);
-
-#if defined(__AVX512ER__) // KNL+
- /* optimization works only for 8-wide BVHs with 16-wide SIMD */
- const vint<16> id(step);
- const vint<16> id2 = align_shift_right<16/2>(id, id);
- permX = select(vfloat<16>(dir.x) >= 0.0f, id, id2);
- permY = select(vfloat<16>(dir.y) >= 0.0f, id, id2);
- permZ = select(vfloat<16>(dir.z) >= 0.0f, id, id2);
-#endif
-
- }
-
- template<int K>
- __forceinline TravRayBase(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
- const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
- size_t flip = sizeof(vfloat<N>))
- {
- org = Vec3vf<Nx>(ray_org.x[k], ray_org.y[k], ray_org.z[k]);
- dir = Vec3vf<Nx>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]);
- rdir = Vec3vf<Nx>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
-#if defined(__FMA_X4__)
-#if !defined(__aarch64__)
- org_rdir = org*rdir;
-#else
- neg_org_rdir = -(org*rdir);
-#endif
-#endif
- nearX = nearXYZ.x[k];
- nearY = nearXYZ.y[k];
- nearZ = nearXYZ.z[k];
- farX = nearX ^ flip;
- farY = nearY ^ flip;
- farZ = nearZ ^ flip;
-
-#if defined(__AVX512ER__) // KNL+
- /* optimization works only for 8-wide BVHs with 16-wide SIMD */
- const vint<16> id(step);
- const vint<16> id2 = align_shift_right<16/2>(id, id);
- permX = select(vfloat<16>(dir.x) >= 0.0f, id, id2);
- permY = select(vfloat<16>(dir.y) >= 0.0f, id, id2);
- permZ = select(vfloat<16>(dir.z) >= 0.0f, id, id2);
-#endif
- }
-
- Vec3fa org_xyz, dir_xyz;
- Vec3vf<Nx> org, dir, rdir;
-#if defined(__FMA_X4__)
-#if !defined(__aarch64__)
- Vec3vf<Nx> org_rdir;
-#else
- //aarch64 version are keeping negation of the org_rdir and use madd
- //x86 uses msub
- Vec3vf<Nx> neg_org_rdir;
-#endif
-#endif
-#if defined(__AVX512ER__) // KNL+
- vint16 permX, permY, permZ;
-#endif
-
- size_t nearX, nearY, nearZ;
- size_t farX, farY, farZ;
- };
-
- /* Base (without tnear and tfar) */
- template<int N, int Nx>
- struct TravRayBase<N,Nx,true>
- {
- __forceinline TravRayBase() {}
-
- __forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir)
- : org_xyz(ray_org), dir_xyz(ray_dir)
- {
- const float round_down = 1.0f-3.0f*float(ulp);
- const float round_up = 1.0f+3.0f*float(ulp);
- const Vec3fa ray_rdir = 1.0f/zero_fix(ray_dir);
- const Vec3fa ray_rdir_near = round_down*ray_rdir;
- const Vec3fa ray_rdir_far = round_up *ray_rdir;
- org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z);
- dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z);
- rdir_near = Vec3vf<N>(ray_rdir_near.x,ray_rdir_near.y,ray_rdir_near.z);
- rdir_far = Vec3vf<N>(ray_rdir_far .x,ray_rdir_far .y,ray_rdir_far .z);
- nearX = ray_rdir_near.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>);
- nearY = ray_rdir_near.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>);
- nearZ = ray_rdir_near.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>);
- farX = nearX ^ sizeof(vfloat<N>);
- farY = nearY ^ sizeof(vfloat<N>);
- farZ = nearZ ^ sizeof(vfloat<N>);
-
-#if defined(__AVX512ER__) // KNL+
- /* optimization works only for 8-wide BVHs with 16-wide SIMD */
- const vint<16> id(step);
- const vint<16> id2 = align_shift_right<16/2>(id, id);
- permX = select(vfloat<16>(dir.x) >= 0.0f, id, id2);
- permY = select(vfloat<16>(dir.y) >= 0.0f, id, id2);
- permZ = select(vfloat<16>(dir.z) >= 0.0f, id, id2);
-#endif
- }
-
- template<int K>
- __forceinline TravRayBase(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
- const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
- size_t flip = sizeof(vfloat<N>))
- {
- const vfloat<Nx> round_down = 1.0f-3.0f*float(ulp);
- const vfloat<Nx> round_up = 1.0f+3.0f*float(ulp);
- org = Vec3vf<Nx>(ray_org.x[k], ray_org.y[k], ray_org.z[k]);
- dir = Vec3vf<Nx>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]);
- rdir_near = round_down*Vec3vf<Nx>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
- rdir_far = round_up *Vec3vf<Nx>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
-
- nearX = nearXYZ.x[k];
- nearY = nearXYZ.y[k];
- nearZ = nearXYZ.z[k];
- farX = nearX ^ flip;
- farY = nearY ^ flip;
- farZ = nearZ ^ flip;
-
-#if defined(__AVX512ER__) // KNL+
- /* optimization works only for 8-wide BVHs with 16-wide SIMD */
- const vint<16> id(step);
- const vint<16> id2 = align_shift_right<16/2>(id, id);
- permX = select(vfloat<16>(dir.x) >= 0.0f, id, id2);
- permY = select(vfloat<16>(dir.y) >= 0.0f, id, id2);
- permZ = select(vfloat<16>(dir.z) >= 0.0f, id, id2);
-#endif
- }
-
- Vec3fa org_xyz, dir_xyz;
- Vec3vf<Nx> org, dir, rdir_near, rdir_far;
-#if defined(__AVX512ER__) // KNL+
- vint16 permX, permY, permZ;
-#endif
-
- size_t nearX, nearY, nearZ;
- size_t farX, farY, farZ;
- };
-
- /* Full (with tnear and tfar) */
- template<int N, int Nx, bool robust>
- struct TravRay : TravRayBase<N,Nx,robust>
- {
- __forceinline TravRay() {}
-
- __forceinline TravRay(const Vec3fa& ray_org, const Vec3fa& ray_dir, float ray_tnear, float ray_tfar)
- : TravRayBase<N,Nx,robust>(ray_org, ray_dir),
- tnear(ray_tnear), tfar(ray_tfar) {}
-
- template<int K>
- __forceinline TravRay(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
- const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
- float ray_tnear, float ray_tfar,
- size_t flip = sizeof(vfloat<N>))
- : TravRayBase<N,Nx,robust>(k, ray_org, ray_dir, ray_rdir, nearXYZ, flip),
- tnear(ray_tnear), tfar(ray_tfar) {}
-
- vfloat<Nx> tnear;
- vfloat<Nx> tfar;
- };
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Point Query structure used in single-ray traversal
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N>
- struct TravPointQuery
- {
- __forceinline TravPointQuery() {}
-
- __forceinline TravPointQuery(const Vec3fa& query_org, const Vec3fa& query_rad)
- {
- org = Vec3vf<N>(query_org.x, query_org.y, query_org.z);
- rad = Vec3vf<N>(query_rad.x, query_rad.y, query_rad.z);
- }
-
- __forceinline vfloat<N> const& tfar() const {
- return rad.x;
- }
-
- Vec3vf<N> org, rad;
- };
-
- //////////////////////////////////////////////////////////////////////////////////////
- // point query
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N>
- __forceinline size_t pointQuerySphereDistAndMask(
- const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX,
- vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ)
- {
- const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x;
- const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y;
- const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z;
- dist = vX * vX + vY * vY + vZ * vZ;
- const vbool<N> vmask = dist <= query.tfar()*query.tfar();
- const vbool<N> valid = minX <= maxX;
- return movemask(vmask) & movemask(valid);
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
- {
- const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x));
- const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y));
- const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z));
- const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x));
- const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y));
- const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z));
- return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x);
- const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y);
- const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z);
- const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x);
- const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y);
- const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z);
- const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0]));
- const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0]));
- const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0]));
- const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0]));
- const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0]));
- const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0]));
- return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeSphereMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
- size_t mask = pointQueryNodeSphere(node, query, time, dist);
-
- if (unlikely(ref.isAABBNodeMB4D())) {
- const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
- const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t);
- mask &= movemask(vmask);
- }
-
- return mask;
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
- {
- const vfloat<N> start_x(node->start.x);
- const vfloat<N> scale_x(node->scale.x);
- const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
- const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
- const vfloat<N> start_y(node->start.y);
- const vfloat<N> scale_y(node->scale.y);
- const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
- const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
- const vfloat<N> start_z(node->start.z);
- const vfloat<N> scale_z(node->scale.z);
- const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
- const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
- return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask());
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- const vfloat<N> minX = node->dequantizeLowerX(time);
- const vfloat<N> maxX = node->dequantizeUpperX(time);
- const vfloat<N> minY = node->dequantizeLowerY(time);
- const vfloat<N> maxY = node->dequantizeUpperY(time);
- const vfloat<N> minZ = node->dequantizeLowerZ(time);
- const vfloat<N> maxZ = node->dequantizeUpperZ(time);
- return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask());
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
- {
- // TODO: point query - implement
- const vbool<N> vmask = vbool<N>(true);
- const size_t mask = movemask(vmask) & ((1<<N)-1);
- dist = vfloat<N>(0.0f);
- return mask;
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- // TODO: point query - implement
- const vbool<N> vmask = vbool<N>(true);
- const size_t mask = movemask(vmask) & ((1<<N)-1);
- dist = vfloat<N>(0.0f);
- return mask;
- }
-
- template<int N>
- __forceinline size_t pointQueryAABBDistAndMask(
- const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX,
- vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ)
- {
- const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x;
- const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y;
- const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z;
- dist = vX * vX + vY * vY + vZ * vZ;
- const vbool<N> valid = minX <= maxX;
- const vbool<N> vmask = !((maxX < query.org.x - query.rad.x) | (minX > query.org.x + query.rad.x) |
- (maxY < query.org.y - query.rad.y) | (minY > query.org.y + query.rad.y) |
- (maxZ < query.org.z - query.rad.z) | (minZ > query.org.z + query.rad.z));
- return movemask(vmask) & movemask(valid);
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
- {
- const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x));
- const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y));
- const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z));
- const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x));
- const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y));
- const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z));
- return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x);
- const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y);
- const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z);
- const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x);
- const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y);
- const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z);
- const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0]));
- const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0]));
- const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0]));
- const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0]));
- const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0]));
- const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0]));
- return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeAABBMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
- size_t mask = pointQueryNodeAABB(node, query, time, dist);
-
- if (unlikely(ref.isAABBNodeMB4D())) {
- const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
- const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t);
- mask &= movemask(vmask);
- }
-
- return mask;
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat<N> start_x(node->start.x);
- const vfloat<N> scale_x(node->scale.x);
- const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
- const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
- const vfloat<N> start_y(node->start.y);
- const vfloat<N> scale_y(node->scale.y);
- const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
- const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
- const vfloat<N> start_z(node->start.z);
- const vfloat<N> scale_z(node->scale.z);
- const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
- const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
- return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid;
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat<N> minX = node->dequantizeLowerX(time);
- const vfloat<N> maxX = node->dequantizeUpperX(time);
- const vfloat<N> minY = node->dequantizeLowerY(time);
- const vfloat<N> maxY = node->dequantizeUpperY(time);
- const vfloat<N> minZ = node->dequantizeLowerZ(time);
- const vfloat<N> maxZ = node->dequantizeUpperZ(time);
- return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid;
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
- {
- // TODO: point query - implement
- const vbool<N> vmask = vbool<N>(true);
- const size_t mask = movemask(vmask) & ((1<<N)-1);
- dist = vfloat<N>(0.0f);
- return mask;
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- // TODO: point query - implement
- const vbool<N> vmask = vbool<N>(true);
- const size_t mask = movemask(vmask) & ((1<<N)-1);
- dist = vfloat<N>(0.0f);
- return mask;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast AABBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int Nx, bool robust>
- __forceinline size_t intersectNode(const typename BVHN<N>::AABBNode* node, const TravRay<N,Nx,robust>& ray, vfloat<Nx>& dist);
-
- template<>
- __forceinline size_t intersectNode<4,4>(const typename BVH4::AABBNode* node, const TravRay<4,4,false>& ray, vfloat4& dist)
- {
-#if defined(__FMA_X4__)
-#if defined(__aarch64__)
- const vfloat4 tNearX = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat4 tNearY = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat4 tNearZ = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat4 tFarX = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat4 tFarY = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat4 tFarZ = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.neg_org_rdir.z);
-#else
- const vfloat4 tNearX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x);
- const vfloat4 tNearY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y);
- const vfloat4 tNearZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z);
- const vfloat4 tFarX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x);
- const vfloat4 tFarY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y);
- const vfloat4 tFarZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z);
-#endif
-#else
- const vfloat4 tNearX = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x;
- const vfloat4 tNearY = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y;
- const vfloat4 tNearZ = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z;
- const vfloat4 tFarX = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x;
- const vfloat4 tFarY = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y;
- const vfloat4 tFarZ = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z;
-#endif
-
-#if defined(__aarch64__)
- const vfloat4 tNear = maxi(tNearX, tNearY, tNearZ, ray.tnear);
- const vfloat4 tFar = mini(tFarX, tFarY, tFarZ, ray.tfar);
- const vbool4 vmask = asInt(tNear) <= asInt(tFar);
- const size_t mask = movemask(vmask);
-#elif defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW
- const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool4 vmask = asInt(tNear) > asInt(tFar);
- const size_t mask = movemask(vmask) ^ ((1<<4)-1);
-#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool4 vmask = asInt(tNear) <= asInt(tFar);
- const size_t mask = movemask(vmask);
-#else
- const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool4 vmask = tNear <= tFar;
- const size_t mask = movemask(vmask);
-#endif
- dist = tNear;
- return mask;
- }
-
-#if defined(__AVX__)
-
- template<>
- __forceinline size_t intersectNode<8,8>(const typename BVH8::AABBNode* node, const TravRay<8,8,false>& ray, vfloat8& dist)
- {
-#if defined(__AVX2__)
-#if defined(__aarch64__)
- const vfloat8 tNearX = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat8 tNearY = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat8 tNearZ = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat8 tFarX = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat8 tFarY = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat8 tFarZ = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.neg_org_rdir.z);
-#else
- const vfloat8 tNearX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x);
- const vfloat8 tNearY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y);
- const vfloat8 tNearZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z);
- const vfloat8 tFarX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x);
- const vfloat8 tFarY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y);
- const vfloat8 tFarZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z);
-#endif
-
-#else
- const vfloat8 tNearX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x;
- const vfloat8 tNearY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y;
- const vfloat8 tNearZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z;
- const vfloat8 tFarX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x;
- const vfloat8 tFarY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y;
- const vfloat8 tFarZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z;
-#endif
-
-#if defined(__AVX2__) && !defined(__AVX512F__) // HSW
- const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool8 vmask = asInt(tNear) > asInt(tFar);
- const size_t mask = movemask(vmask) ^ ((1<<8)-1);
-#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool8 vmask = asInt(tNear) <= asInt(tFar);
- const size_t mask = movemask(vmask);
-#else
- const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool8 vmask = tNear <= tFar;
- const size_t mask = movemask(vmask);
-#endif
- dist = tNear;
- return mask;
- }
-
-#endif
-
-#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL
-
- template<>
- __forceinline size_t intersectNode<4,16>(const typename BVH4::AABBNode* node, const TravRay<4,16,false>& ray, vfloat16& dist)
- {
- const vfloat16 tNearX = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x);
- const vfloat16 tNearY = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y);
- const vfloat16 tNearZ = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z);
- const vfloat16 tFarX = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x);
- const vfloat16 tFarY = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y);
- const vfloat16 tFarZ = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z);
- const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool16 vmask = le(vbool16(0xf),tNear,tFar);
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
- template<>
- __forceinline size_t intersectNode<8,16>(const typename BVH8::AABBNode* node, const TravRay<8,16,false>& ray, vfloat16& dist)
- {
- const vllong8 invalid((size_t)BVH8::emptyNode);
- const vboold8 m_valid(invalid != vllong8::loadu(node->children));
- const vfloat16 bminmaxX = permute(vfloat16::load((const float*)&node->lower_x), ray.permX);
- const vfloat16 bminmaxY = permute(vfloat16::load((const float*)&node->lower_y), ray.permY);
- const vfloat16 bminmaxZ = permute(vfloat16::load((const float*)&node->lower_z), ray.permZ);
- const vfloat16 tNearFarX = msub(bminmaxX, ray.rdir.x, ray.org_rdir.x);
- const vfloat16 tNearFarY = msub(bminmaxY, ray.rdir.y, ray.org_rdir.y);
- const vfloat16 tNearFarZ = msub(bminmaxZ, ray.rdir.z, ray.org_rdir.z);
- const vfloat16 tNear = max(tNearFarX, tNearFarY, tNearFarZ, ray.tnear);
- const vfloat16 tFar = min(tNearFarX, tNearFarY, tNearFarZ, ray.tfar);
- const vbool16 vmask = le(vboolf16(m_valid),tNear,align_shift_right<8>(tFar, tFar));
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
-#endif
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Robust AABBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int Nx>
- __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNode* node, const TravRay<N,Nx,true>& ray, vfloat<Nx>& dist)
- {
- const vfloat<N> tNearX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x;
- const vfloat<N> tNearY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y;
- const vfloat<N> tNearZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z;
- const vfloat<N> tFarX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x;
- const vfloat<N> tFarY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y;
- const vfloat<N> tFarZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z;
- const vfloat<N> tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat<N> tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool<N> vmask = tNear <= tFar;
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
-#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL
-
- template<>
- __forceinline size_t intersectNodeRobust<4,16>(const typename BVHN<4>::AABBNode* node, const TravRay<4,16,true>& ray, vfloat<16>& dist)
- {
- const vfloat16 tNearX = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x;
- const vfloat16 tNearY = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y;
- const vfloat16 tNearZ = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z;
- const vfloat16 tFarX = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x;
- const vfloat16 tFarY = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y;
- const vfloat16 tFarZ = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z;
- const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool16 vmask = le((1 << 4)-1,tNear,tFar);
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
- template<>
- __forceinline size_t intersectNodeRobust<8,16>(const typename BVHN<8>::AABBNode* node, const TravRay<8,16,true>& ray, vfloat<16>& dist)
- {
- const vfloat16 tNearX = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x;
- const vfloat16 tNearY = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y;
- const vfloat16 tNearZ = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z;
- const vfloat16 tFarX = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x;
- const vfloat16 tFarY = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y;
- const vfloat16 tFarZ = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z;
- const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool16 vmask = le((1 << 8)-1,tNear,tFar);
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
-#endif
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast AABBNodeMB intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N>
- __forceinline size_t intersectNode(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,N,false>& ray, const float time, vfloat<N>& dist)
- {
- const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
- const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
- const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
- const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
- const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
- const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
-#if defined(__FMA_X4__)
-#if defined(__aarch64__)
- const vfloat<N> tNearX = madd(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<N> tNearY = madd(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<N> tNearZ = madd(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<N> tFarX = madd(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<N> tFarY = madd(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<N> tFarZ = madd(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.neg_org_rdir.z);
-#else
- const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x);
- const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y);
- const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z);
- const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x);
- const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y);
- const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z);
-#endif
-#else
- const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x;
- const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y;
- const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z;
- const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x;
- const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y;
- const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z;
-#endif
-#if defined(__FMA_X4__) && !defined(__AVX512F__) // HSW
- const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool<N> vmask = asInt(tNear) > asInt(tFar);
- const size_t mask = movemask(vmask) ^ ((1<<N)-1);
-#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool<N> vmask = asInt(tNear) <= asInt(tFar);
- const size_t mask = movemask(vmask);
-#else
- const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
- const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
- const vbool<N> vmask = tNear <= tFar;
- const size_t mask = movemask(vmask);
-#endif
- dist = tNear;
- return mask;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Robust AABBNodeMB intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N>
- __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,N,true>& ray, const float time, vfloat<N>& dist)
- {
- const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
- const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
- const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
- const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x;
- const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y;
- const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z;
- const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
- const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
- const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
- const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
- const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x;
- const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y;
- const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z;
- const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ);
- const size_t mask = movemask(tNear <= tFar);
- dist = tNear;
- return mask;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast AABBNodeMB4D intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N>
- __forceinline size_t intersectNodeMB4D(const typename BVHN<N>::NodeRef ref, const TravRay<N,N,false>& ray, const float time, vfloat<N>& dist)
- {
- const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
-
- const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
- const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
- const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
- const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
- const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
- const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
-#if defined (__FMA_X4__)
-#if defined(__aarch64__)
- const vfloat<N> tNearX = madd(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<N> tNearY = madd(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<N> tNearZ = madd(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<N> tFarX = madd(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<N> tFarY = madd(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<N> tFarZ = madd(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.neg_org_rdir.z);
-#else
- const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x);
- const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y);
- const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z);
- const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x);
- const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y);
- const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z);
-#endif
-#else
- const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x;
- const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y;
- const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z;
- const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x;
- const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y;
- const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z;
-#endif
-#if defined(__FMA_X4__) && !defined(__AVX512F__)
- const vfloat<N> tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray.tnear));
- const vfloat<N> tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray.tfar ));
-#else
- const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
- const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
-#endif
- vbool<N> vmask = tNear <= tFar;
- if (unlikely(ref.isAABBNodeMB4D())) {
- const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
- vmask &= (node1->lower_t <= time) & (time < node1->upper_t);
- }
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Robust AABBNodeMB4D intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N>
- __forceinline size_t intersectNodeMB4DRobust(const typename BVHN<N>::NodeRef ref, const TravRay<N,N,true>& ray, const float time, vfloat<N>& dist)
- {
- const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
-
- const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
- const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
- const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
- const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x;
- const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y;
- const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z;
- const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
- const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
- const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
- const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
- const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x;
- const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y;
- const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z;
- const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ);
- vbool<N> vmask = tNear <= tFar;
- if (unlikely(ref.isAABBNodeMB4D())) {
- const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
- vmask &= (node1->lower_t <= time) & (time < node1->upper_t);
- }
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast QuantizedBaseNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int Nx, bool robust>
- __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,Nx,robust>& ray, vfloat<Nx>& dist);
-
- template<>
- __forceinline size_t intersectNode<4,4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,4,false>& ray, vfloat4& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat4 start_x(node->start.x);
- const vfloat4 scale_x(node->scale.x);
- const vfloat4 lower_x = madd(node->dequantize<4>(ray.nearX >> 2),scale_x,start_x);
- const vfloat4 upper_x = madd(node->dequantize<4>(ray.farX >> 2),scale_x,start_x);
- const vfloat4 start_y(node->start.y);
- const vfloat4 scale_y(node->scale.y);
- const vfloat4 lower_y = madd(node->dequantize<4>(ray.nearY >> 2),scale_y,start_y);
- const vfloat4 upper_y = madd(node->dequantize<4>(ray.farY >> 2),scale_y,start_y);
- const vfloat4 start_z(node->start.z);
- const vfloat4 scale_z(node->scale.z);
- const vfloat4 lower_z = madd(node->dequantize<4>(ray.nearZ >> 2),scale_z,start_z);
- const vfloat4 upper_z = madd(node->dequantize<4>(ray.farZ >> 2),scale_z,start_z);
-
-#if defined(__FMA_X4__)
-#if defined(__aarch64__)
- const vfloat4 tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat4 tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat4 tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat4 tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat4 tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat4 tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
-#else
- const vfloat4 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat4 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat4 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat4 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat4 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat4 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
-#endif
-#else
- const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir.x;
- const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir.y;
- const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
- const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir.x;
- const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir.y;
- const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
-#endif
-
-#if (defined(__aarch64__) && defined(BUILD_IOS)) || defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW
- const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool4 vmask = asInt(tNear) > asInt(tFar);
- const size_t mask = movemask(vmask) ^ ((1<<4)-1);
-#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool4 vmask = asInt(tNear) <= asInt(tFar);
- const size_t mask = movemask(vmask);
-#else
- const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool4 vmask = tNear <= tFar;
- const size_t mask = movemask(vmask);
-#endif
- dist = tNear;
- return mask & mvalid;
- }
-
- template<>
- __forceinline size_t intersectNode<4,4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,4,true>& ray, vfloat4& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat4 start_x(node->start.x);
- const vfloat4 scale_x(node->scale.x);
- const vfloat4 lower_x = madd(node->dequantize<4>(ray.nearX >> 2),scale_x,start_x);
- const vfloat4 upper_x = madd(node->dequantize<4>(ray.farX >> 2),scale_x,start_x);
- const vfloat4 start_y(node->start.y);
- const vfloat4 scale_y(node->scale.y);
- const vfloat4 lower_y = madd(node->dequantize<4>(ray.nearY >> 2),scale_y,start_y);
- const vfloat4 upper_y = madd(node->dequantize<4>(ray.farY >> 2),scale_y,start_y);
- const vfloat4 start_z(node->start.z);
- const vfloat4 scale_z(node->scale.z);
- const vfloat4 lower_z = madd(node->dequantize<4>(ray.nearZ >> 2),scale_z,start_z);
- const vfloat4 upper_z = madd(node->dequantize<4>(ray.farZ >> 2),scale_z,start_z);
-
- const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
- const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
- const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
- const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
- const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
- const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
-
- const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool4 vmask = tNear <= tFar;
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask & mvalid;
- }
-
-
-#if defined(__AVX__)
-
- template<>
- __forceinline size_t intersectNode<8,8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,8,false>& ray, vfloat8& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat8 start_x(node->start.x);
- const vfloat8 scale_x(node->scale.x);
- const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x);
- const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x);
- const vfloat8 start_y(node->start.y);
- const vfloat8 scale_y(node->scale.y);
- const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y);
- const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y);
- const vfloat8 start_z(node->start.z);
- const vfloat8 scale_z(node->scale.z);
- const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z);
- const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z);
-
-#if defined(__AVX2__)
-#if defined(__aarch64__)
- const vfloat8 tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat8 tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat8 tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat8 tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat8 tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat8 tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
-#else
- const vfloat8 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat8 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat8 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat8 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat8 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat8 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
-#endif
-#else
- const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir.x;
- const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir.y;
- const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
- const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir.x;
- const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir.y;
- const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
-#endif
-
-#if defined(__AVX2__) && !defined(__AVX512F__) // HSW
- const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool8 vmask = asInt(tNear) > asInt(tFar);
- const size_t mask = movemask(vmask) ^ ((1<<8)-1);
-#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool8 vmask = asInt(tNear) <= asInt(tFar);
- const size_t mask = movemask(vmask);
-#else
- const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool8 vmask = tNear <= tFar;
- const size_t mask = movemask(vmask);
-#endif
- dist = tNear;
- return mask & mvalid;
- }
-
- template<>
- __forceinline size_t intersectNode<8,8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,8,true>& ray, vfloat8& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat8 start_x(node->start.x);
- const vfloat8 scale_x(node->scale.x);
- const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x);
- const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x);
- const vfloat8 start_y(node->start.y);
- const vfloat8 scale_y(node->scale.y);
- const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y);
- const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y);
- const vfloat8 start_z(node->start.z);
- const vfloat8 scale_z(node->scale.z);
- const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z);
- const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z);
-
- const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
- const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
- const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
- const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
- const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
- const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
-
- const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool8 vmask = tNear <= tFar;
- const size_t mask = movemask(vmask);
-
- dist = tNear;
- return mask & mvalid;
- }
-
-
-#endif
-
-#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL
-
- template<>
- __forceinline size_t intersectNode<4,16>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,16,false>& ray, vfloat16& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat16 start_x(node->start.x);
- const vfloat16 scale_x(node->scale.x);
- const vfloat16 lower_x = madd(vfloat16(node->dequantize<4>(ray.nearX >> 2)),scale_x,start_x);
- const vfloat16 upper_x = madd(vfloat16(node->dequantize<4>(ray.farX >> 2)),scale_x,start_x);
- const vfloat16 start_y(node->start.y);
- const vfloat16 scale_y(node->scale.y);
- const vfloat16 lower_y = madd(vfloat16(node->dequantize<4>(ray.nearY >> 2)),scale_y,start_y);
- const vfloat16 upper_y = madd(vfloat16(node->dequantize<4>(ray.farY >> 2)),scale_y,start_y);
- const vfloat16 start_z(node->start.z);
- const vfloat16 scale_z(node->scale.z);
- const vfloat16 lower_z = madd(vfloat16(node->dequantize<4>(ray.nearZ >> 2)),scale_z,start_z);
- const vfloat16 upper_z = madd(vfloat16(node->dequantize<4>(ray.farZ >> 2)),scale_z,start_z);
-
- const vfloat16 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat16 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat16 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat16 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat16 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat16 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool16 vmask = le(vbool16(0xf),tNear,tFar);
- const size_t mask = movemask(vmask) & mvalid;
- dist = tNear;
- return mask;
- }
-
- template<>
- __forceinline size_t intersectNode<4,16>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,16,true>& ray, vfloat16& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat16 start_x(node->start.x);
- const vfloat16 scale_x(node->scale.x);
- const vfloat16 lower_x = madd(vfloat16(node->dequantize<4>(ray.nearX >> 2)),scale_x,start_x);
- const vfloat16 upper_x = madd(vfloat16(node->dequantize<4>(ray.farX >> 2)),scale_x,start_x);
- const vfloat16 start_y(node->start.y);
- const vfloat16 scale_y(node->scale.y);
- const vfloat16 lower_y = madd(vfloat16(node->dequantize<4>(ray.nearY >> 2)),scale_y,start_y);
- const vfloat16 upper_y = madd(vfloat16(node->dequantize<4>(ray.farY >> 2)),scale_y,start_y);
- const vfloat16 start_z(node->start.z);
- const vfloat16 scale_z(node->scale.z);
- const vfloat16 lower_z = madd(vfloat16(node->dequantize<4>(ray.nearZ >> 2)),scale_z,start_z);
- const vfloat16 upper_z = madd(vfloat16(node->dequantize<4>(ray.farZ >> 2)),scale_z,start_z);
-
- const vfloat16 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
- const vfloat16 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
- const vfloat16 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
- const vfloat16 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
- const vfloat16 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
- const vfloat16 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
-
- const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool16 vmask = le(vbool16(0xf),tNear,tFar);
- const size_t mask = movemask(vmask) & mvalid;
- dist = tNear;
- return mask;
- }
-
- template<>
- __forceinline size_t intersectNode<8,16>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,16,false>& ray, vfloat16& dist)
- {
- const vbool16 m_valid(node->validMask16());
- const vfloat16 bminmaxX = node->dequantizeLowerUpperX(ray.permX);
- const vfloat16 bminmaxY = node->dequantizeLowerUpperY(ray.permY);
- const vfloat16 bminmaxZ = node->dequantizeLowerUpperZ(ray.permZ);
- const vfloat16 tNearFarX = msub(bminmaxX, ray.rdir.x, ray.org_rdir.x);
- const vfloat16 tNearFarY = msub(bminmaxY, ray.rdir.y, ray.org_rdir.y);
- const vfloat16 tNearFarZ = msub(bminmaxZ, ray.rdir.z, ray.org_rdir.z);
- const vfloat16 tNear = max(tNearFarX, tNearFarY, tNearFarZ, ray.tnear);
- const vfloat16 tFar = min(tNearFarX, tNearFarY, tNearFarZ, ray.tfar);
- const vbool16 vmask = le(m_valid,tNear,align_shift_right<8>(tFar, tFar));
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
- template<>
- __forceinline size_t intersectNode<8,16>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,16,true>& ray, vfloat16& dist)
- {
- const vbool16 m_valid(node->validMask16());
- const vfloat16 bminmaxX = node->dequantizeLowerUpperX(ray.permX);
- const vfloat16 bminmaxY = node->dequantizeLowerUpperY(ray.permY);
- const vfloat16 bminmaxZ = node->dequantizeLowerUpperZ(ray.permZ);
- const vfloat16 tNearFarX = (bminmaxX - ray.org.x) * ray.rdir_far.x; // FIXME: this is not conservative !!!!!!!!!
- const vfloat16 tNearFarY = (bminmaxY - ray.org.y) * ray.rdir_far.y;
- const vfloat16 tNearFarZ = (bminmaxZ - ray.org.z) * ray.rdir_far.z;
- const vfloat16 tNear = max(tNearFarX, tNearFarY, tNearFarZ, ray.tnear);
- const vfloat16 tFar = min(tNearFarX, tNearFarY, tNearFarZ, ray.tfar);
- const vbool16 vmask = le(m_valid,tNear,align_shift_right<8>(tFar, tFar));
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
-
-#endif
-
-
- template<int N, int Nx>
- __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,Nx,false>& ray, const float time, vfloat<N>& dist)
- {
- const vboolf<N> mvalid = node->validMask();
- const vfloat<N> lower_x = node->dequantizeLowerX(time);
- const vfloat<N> upper_x = node->dequantizeUpperX(time);
- const vfloat<N> lower_y = node->dequantizeLowerY(time);
- const vfloat<N> upper_y = node->dequantizeUpperY(time);
- const vfloat<N> lower_z = node->dequantizeLowerZ(time);
- const vfloat<N> upper_z = node->dequantizeUpperZ(time);
-#if defined(__FMA_X4__)
-#if defined(__aarch64__)
- const vfloat<N> tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<N> tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<N> tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<N> tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<N> tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<N> tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
-#else
- const vfloat<N> tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat<N> tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat<N> tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat<N> tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat<N> tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat<N> tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
-#endif
-#else
- const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir.x;
- const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir.y;
- const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
- const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir.x;
- const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir.y;
- const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
-#endif
-
- const vfloat<N> tminX = mini(tNearX,tFarX);
- const vfloat<N> tmaxX = maxi(tNearX,tFarX);
- const vfloat<N> tminY = mini(tNearY,tFarY);
- const vfloat<N> tmaxY = maxi(tNearY,tFarY);
- const vfloat<N> tminZ = mini(tNearZ,tFarZ);
- const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ);
- const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear);
- const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar);
-#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar));
-#else
- const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid;
-#endif
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
- template<int N, int Nx>
- __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,Nx,true>& ray, const float time, vfloat<N>& dist)
- {
- const vboolf<N> mvalid = node->validMask();
- const vfloat<N> lower_x = node->dequantizeLowerX(time);
- const vfloat<N> upper_x = node->dequantizeUpperX(time);
- const vfloat<N> lower_y = node->dequantizeLowerY(time);
- const vfloat<N> upper_y = node->dequantizeUpperY(time);
- const vfloat<N> lower_z = node->dequantizeLowerZ(time);
- const vfloat<N> upper_z = node->dequantizeUpperZ(time);
- const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
- const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
- const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
- const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
- const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
- const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
-
- const vfloat<N> tminX = mini(tNearX,tFarX);
- const vfloat<N> tmaxX = maxi(tNearX,tFarX);
- const vfloat<N> tminY = mini(tNearY,tFarY);
- const vfloat<N> tmaxY = maxi(tNearY,tFarY);
- const vfloat<N> tminZ = mini(tNearZ,tFarZ);
- const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ);
- const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear);
- const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar);
-#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar));
-#else
- const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid;
-#endif
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
-
-#if defined(__AVX512ER__)
- // for KNL
- template<>
- __forceinline size_t intersectNode<4,16>(const typename BVHN<4>::QuantizedBaseNodeMB* node, const TravRay<4,16,false>& ray, const float time, vfloat<4>& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat16 lower_x = node->dequantizeLowerX(time);
- const vfloat16 upper_x = node->dequantizeUpperX(time);
- const vfloat16 lower_y = node->dequantizeLowerY(time);
- const vfloat16 upper_y = node->dequantizeUpperY(time);
- const vfloat16 lower_z = node->dequantizeLowerZ(time);
- const vfloat16 upper_z = node->dequantizeUpperZ(time);
-
- const vfloat16 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat16 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat16 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat16 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat16 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat16 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
-
- const vfloat16 tminX = min(tNearX,tFarX);
- const vfloat16 tmaxX = max(tNearX,tFarX);
- const vfloat16 tminY = min(tNearY,tFarY);
- const vfloat16 tmaxY = max(tNearY,tFarY);
- const vfloat16 tminZ = min(tNearZ,tFarZ);
- const vfloat16 tmaxZ = max(tNearZ,tFarZ);
- const vfloat16 tNear = max(tminX,tminY,tminZ,ray.tnear);
- const vfloat16 tFar = min(tmaxX,tmaxY,tmaxZ,ray.tfar );
- const vbool16 vmask = tNear <= tFar;
- const size_t mask = movemask(vmask) & mvalid;
- dist = extractN<4,0>(tNear);
- return mask;
- }
-
-
- // for KNL
- template<>
- __forceinline size_t intersectNode<4,16>(const typename BVHN<4>::QuantizedBaseNodeMB* node, const TravRay<4,16,true>& ray, const float time, vfloat<4>& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat16 lower_x = node->dequantizeLowerX(time);
- const vfloat16 upper_x = node->dequantizeUpperX(time);
- const vfloat16 lower_y = node->dequantizeLowerY(time);
- const vfloat16 upper_y = node->dequantizeUpperY(time);
- const vfloat16 lower_z = node->dequantizeLowerZ(time);
- const vfloat16 upper_z = node->dequantizeUpperZ(time);
-
- const vfloat16 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
- const vfloat16 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
- const vfloat16 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
- const vfloat16 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
- const vfloat16 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
- const vfloat16 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
-
- const vfloat16 tminX = min(tNearX,tFarX);
- const vfloat16 tmaxX = max(tNearX,tFarX);
- const vfloat16 tminY = min(tNearY,tFarY);
- const vfloat16 tmaxY = max(tNearY,tFarY);
- const vfloat16 tminZ = min(tNearZ,tFarZ);
- const vfloat16 tmaxZ = max(tNearZ,tFarZ);
- const vfloat16 tNear = max(tminX,tminY,tminZ,ray.tnear);
- const vfloat16 tFar = min(tmaxX,tmaxY,tmaxZ,ray.tfar );
- const vbool16 vmask = tNear <= tFar;
- const size_t mask = movemask(vmask) & mvalid;
- dist = extractN<4,0>(tNear);
- return mask;
- }
-
-#endif
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast OBBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, bool robust>
- __forceinline size_t intersectNode(const typename BVHN<N>::OBBNode* node, const TravRay<N,N,robust>& ray, vfloat<N>& dist)
- {
- const Vec3vf<N> dir = xfmVector(node->naabb,ray.dir);
- //const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))/dir;
- const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))*rcp_safe(dir);
- const Vec3vf<N> org = xfmPoint(node->naabb,ray.org);
- const Vec3vf<N> tLowerXYZ = org * nrdir; // (Vec3fa(zero) - org) * rdir;
- const Vec3vf<N> tUpperXYZ = tLowerXYZ - nrdir; // (Vec3fa(one ) - org) * rdir;
-
- const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x);
- const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y);
- const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z);
- const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x);
- const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y);
- const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z);
- vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ);
- vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
- if (robust) {
- tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp));
- tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp));
- }
- const vbool<N> vmask = tNear <= tFar;
- dist = tNear;
- return movemask(vmask);
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast OBBNodeMB intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, bool robust>
- __forceinline size_t intersectNode(const typename BVHN<N>::OBBNodeMB* node, const TravRay<N,N,robust>& ray, const float time, vfloat<N>& dist)
- {
- const AffineSpace3vf<N> xfm = node->space0;
- const Vec3vf<N> b0_lower = zero;
- const Vec3vf<N> b0_upper = one;
- const Vec3vf<N> lower = lerp(b0_lower,node->b1.lower,vfloat<N>(time));
- const Vec3vf<N> upper = lerp(b0_upper,node->b1.upper,vfloat<N>(time));
-
- const BBox3vf<N> bounds(lower,upper);
- const Vec3vf<N> dir = xfmVector(xfm,ray.dir);
- const Vec3vf<N> rdir = rcp_safe(dir);
- const Vec3vf<N> org = xfmPoint(xfm,ray.org);
-
- const Vec3vf<N> tLowerXYZ = (bounds.lower - org) * rdir;
- const Vec3vf<N> tUpperXYZ = (bounds.upper - org) * rdir;
-
- const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x);
- const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y);
- const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z);
- const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x);
- const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y);
- const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z);
- vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ);
- vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
- if (robust) {
- tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp));
- tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp));
- }
- const vbool<N> vmask = tNear <= tFar;
- dist = tNear;
- return movemask(vmask);
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Node intersectors used in point query raversal
- //////////////////////////////////////////////////////////////////////////////////////
-
- /*! Computes traversal information for N nodes with 1 point query */
- template<int N, int types>
- struct BVHNNodePointQuerySphere1;
-
- template<int N>
- struct BVHNNodePointQuerySphere1<N, BVH_AN1>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = pointQueryNodeSphere(node.getAABBNode(), query, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQuerySphere1<N, BVH_AN2>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = pointQueryNodeSphereMB4D<N>(node, query, time, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQuerySphere1<N, BVH_AN1_UN1>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (likely(node.isAABBNode())) mask = pointQueryNodeSphere(node.getAABBNode(), query, dist);
- else if (unlikely(node.isOBBNode())) mask = pointQueryNodeSphere(node.ungetAABBNode(), query, dist);
- else return false;
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQuerySphere1<N, BVH_AN2_UN2>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (likely(node.isAABBNodeMB())) mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist);
- else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist);
- else return false;
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D_UN2>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist);
- else mask = pointQueryNodeSphereMB4D(node, query, time, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQuerySphere1<N, BVH_QN1>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = pointQueryNodeSphere((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNQuantizedBaseNodePointQuerySphere1
- {
- static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
- {
- return pointQueryNodeSphere(node,query,dist);
- }
-
- static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- return pointQueryNodeSphere(node,query,time,dist);
- }
- };
-
- /*! Computes traversal information for N nodes with 1 point query */
- template<int N, int types>
- struct BVHNNodePointQueryAABB1;
-
- template<int N>
- struct BVHNNodePointQueryAABB1<N, BVH_AN1>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = pointQueryNodeAABB(node.getAABBNode(), query, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQueryAABB1<N, BVH_AN2>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = pointQueryNodeAABBMB4D<N>(node, query, time, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQueryAABB1<N, BVH_AN1_UN1>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (likely(node.isAABBNode())) mask = pointQueryNodeAABB(node.getAABBNode(), query, dist);
- else if (unlikely(node.isOBBNode())) mask = pointQueryNodeAABB(node.ungetAABBNode(), query, dist);
- else return false;
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQueryAABB1<N, BVH_AN2_UN2>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (likely(node.isAABBNodeMB())) mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist);
- else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist);
- else return false;
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D_UN2>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist);
- else mask = pointQueryNodeAABBMB4D(node, query, time, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQueryAABB1<N, BVH_QN1>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = pointQueryNodeAABB((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNQuantizedBaseNodePointQueryAABB1
- {
- static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
- {
- return pointQueryNodeAABB(node,query,dist);
- }
-
- static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- return pointQueryNodeAABB(node,query,time,dist);
- }
- };
-
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Node intersectors used in ray traversal
- //////////////////////////////////////////////////////////////////////////////////////
-
- /*! Intersects N nodes with 1 ray */
- template<int N, int Nx, int types, bool robust>
- struct BVHNNodeIntersector1;
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN1, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = intersectNode(node.getAABBNode(), ray, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN1, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = intersectNodeRobust(node.getAABBNode(), ray, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN2, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = intersectNode(node.getAABBNodeMB(), ray, time, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN2, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN2_AN4D, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = intersectNodeMB4D<N>(node, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN2_AN4D, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = intersectNodeMB4DRobust<N>(node, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN1_UN1, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (likely(node.isAABBNode())) mask = intersectNode(node.getAABBNode(), ray, dist);
- else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist);
- else return false;
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN1_UN1, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (likely(node.isAABBNode())) mask = intersectNodeRobust(node.getAABBNode(), ray, dist);
- else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist);
- else return false;
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN2_UN2, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (likely(node.isAABBNodeMB())) mask = intersectNode(node.getAABBNodeMB(), ray, time, dist);
- else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
- else return false;
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN2_UN2, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (likely(node.isAABBNodeMB())) mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist);
- else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
- else return false;
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN2_AN4D_UN2, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
- else mask = intersectNodeMB4D(node, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN2_AN4D_UN2, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
- else mask = intersectNodeMB4DRobust(node, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_QN1, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = intersectNode((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_QN1, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = intersectNodeRobust((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist);
- return true;
- }
- };
-
- /*! Intersects N nodes with K rays */
- template<int N, int Nx, bool robust>
- struct BVHNQuantizedBaseNodeIntersector1;
-
- template<int N, int Nx>
- struct BVHNQuantizedBaseNodeIntersector1<N, Nx, false>
- {
- static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,Nx,false>& ray, vfloat<Nx>& dist)
- {
- return intersectNode(node,ray,dist);
- }
-
- static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,Nx,false>& ray, const float time, vfloat<N>& dist)
- {
- return intersectNode(node,ray,time,dist);
- }
-
- };
-
- template<int N, int Nx>
- struct BVHNQuantizedBaseNodeIntersector1<N, Nx, true>
- {
- static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,Nx,true>& ray, vfloat<Nx>& dist)
- {
- return intersectNode(node,ray,dist);
- }
-
- static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,Nx,true>& ray, const float time, vfloat<N>& dist)
- {
- return intersectNode(node,ray,time,dist);
- }
-
- };
-
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_frustum.h b/thirdparty/embree-aarch64/kernels/bvh/node_intersector_frustum.h
deleted file mode 100644
index 800ac8b478..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_frustum.h
+++ /dev/null
@@ -1,269 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "node_intersector.h"
-
-namespace embree
-{
- namespace isa
- {
- //////////////////////////////////////////////////////////////////////////////////////
- // Frustum structure used in hybrid and stream traversal
- //////////////////////////////////////////////////////////////////////////////////////
-
- /*
- Optimized frustum test. We calculate t=(p-org)/dir in ray/box
- intersection. We assume the rays are split by octant, thus
- dir intervals are either positive or negative in each
- dimension.
-
- Case 1: dir.min >= 0 && dir.max >= 0:
- t_min = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
- t_max = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
-
- Case 2: dir.min < 0 && dir.max < 0:
- t_min = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
- t_max = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
- */
-
- template<bool robust>
- struct Frustum;
-
- /* Fast variant */
- template<>
- struct Frustum<false>
- {
- __forceinline Frustum() {}
-
- template<int K>
- __forceinline Frustum(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
- {
- init(valid, org, rdir, ray_tnear, ray_tfar, N);
- }
-
- template<int K>
- __forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
- {
- const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
- reduce_min(select(valid, org.y, pos_inf)),
- reduce_min(select(valid, org.z, pos_inf)));
-
- const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
- reduce_max(select(valid, org.y, neg_inf)),
- reduce_max(select(valid, org.z, neg_inf)));
-
- const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
- reduce_min(select(valid, rdir.y, pos_inf)),
- reduce_min(select(valid, rdir.z, pos_inf)));
-
- const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
- reduce_max(select(valid, rdir.y, neg_inf)),
- reduce_max(select(valid, rdir.z, neg_inf)));
-
- const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
- const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
-
- init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
- }
-
- __forceinline void init(const Vec3fa& reduced_min_org,
- const Vec3fa& reduced_max_org,
- const Vec3fa& reduced_min_rdir,
- const Vec3fa& reduced_max_rdir,
- float reduced_min_dist,
- float reduced_max_dist,
- int N)
- {
- const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
-
- min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
- max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
-
-#if defined (__aarch64__)
- neg_min_org_rdir = -(min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org));
- neg_max_org_rdir = -(max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org));
-#else
- min_org_rdir = min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org);
- max_org_rdir = max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org);
-#endif
- min_dist = reduced_min_dist;
- max_dist = reduced_max_dist;
-
- nf = NearFarPrecalculations(min_rdir, N);
- }
-
- template<int K>
- __forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
- {
- max_dist = reduce_max(ray_tfar);
- }
-
- NearFarPrecalculations nf;
-
- Vec3fa min_rdir;
- Vec3fa max_rdir;
-
-#if defined (__aarch64__)
- Vec3fa neg_min_org_rdir;
- Vec3fa neg_max_org_rdir;
-#else
- Vec3fa min_org_rdir;
- Vec3fa max_org_rdir;
-#endif
- float min_dist;
- float max_dist;
- };
-
- typedef Frustum<false> FrustumFast;
-
- /* Robust variant */
- template<>
- struct Frustum<true>
- {
- __forceinline Frustum() {}
-
- template<int K>
- __forceinline Frustum(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
- {
- init(valid, org, rdir, ray_tnear, ray_tfar, N);
- }
-
- template<int K>
- __forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
- {
- const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
- reduce_min(select(valid, org.y, pos_inf)),
- reduce_min(select(valid, org.z, pos_inf)));
-
- const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
- reduce_max(select(valid, org.y, neg_inf)),
- reduce_max(select(valid, org.z, neg_inf)));
-
- const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
- reduce_min(select(valid, rdir.y, pos_inf)),
- reduce_min(select(valid, rdir.z, pos_inf)));
-
- const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
- reduce_max(select(valid, rdir.y, neg_inf)),
- reduce_max(select(valid, rdir.z, neg_inf)));
-
- const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
- const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
-
- init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
- }
-
- __forceinline void init(const Vec3fa& reduced_min_org,
- const Vec3fa& reduced_max_org,
- const Vec3fa& reduced_min_rdir,
- const Vec3fa& reduced_max_rdir,
- float reduced_min_dist,
- float reduced_max_dist,
- int N)
- {
- const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
- min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
- max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
-
- min_org = select(pos_rdir, reduced_max_org, reduced_min_org);
- max_org = select(pos_rdir, reduced_min_org, reduced_max_org);
-
- min_dist = reduced_min_dist;
- max_dist = reduced_max_dist;
-
- nf = NearFarPrecalculations(min_rdir, N);
- }
-
- template<int K>
- __forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
- {
- max_dist = reduce_max(ray_tfar);
- }
-
- NearFarPrecalculations nf;
-
- Vec3fa min_rdir;
- Vec3fa max_rdir;
-
- Vec3fa min_org;
- Vec3fa max_org;
-
- float min_dist;
- float max_dist;
- };
-
- typedef Frustum<true> FrustumRobust;
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast AABBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int Nx>
- __forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
- const FrustumFast& frustum, vfloat<Nx>& dist)
- {
- const vfloat<Nx> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
- const vfloat<Nx> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
- const vfloat<Nx> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
- const vfloat<Nx> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
- const vfloat<Nx> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
- const vfloat<Nx> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
-
-#if defined (__aarch64__)
- const vfloat<Nx> fminX = madd(bminX, vfloat<Nx>(frustum.min_rdir.x), vfloat<Nx>(frustum.neg_min_org_rdir.x));
- const vfloat<Nx> fminY = madd(bminY, vfloat<Nx>(frustum.min_rdir.y), vfloat<Nx>(frustum.neg_min_org_rdir.y));
- const vfloat<Nx> fminZ = madd(bminZ, vfloat<Nx>(frustum.min_rdir.z), vfloat<Nx>(frustum.neg_min_org_rdir.z));
- const vfloat<Nx> fmaxX = madd(bmaxX, vfloat<Nx>(frustum.max_rdir.x), vfloat<Nx>(frustum.neg_max_org_rdir.x));
- const vfloat<Nx> fmaxY = madd(bmaxY, vfloat<Nx>(frustum.max_rdir.y), vfloat<Nx>(frustum.neg_max_org_rdir.y));
- const vfloat<Nx> fmaxZ = madd(bmaxZ, vfloat<Nx>(frustum.max_rdir.z), vfloat<Nx>(frustum.neg_max_org_rdir.z));
-#else
- const vfloat<Nx> fminX = msub(bminX, vfloat<Nx>(frustum.min_rdir.x), vfloat<Nx>(frustum.min_org_rdir.x));
- const vfloat<Nx> fminY = msub(bminY, vfloat<Nx>(frustum.min_rdir.y), vfloat<Nx>(frustum.min_org_rdir.y));
- const vfloat<Nx> fminZ = msub(bminZ, vfloat<Nx>(frustum.min_rdir.z), vfloat<Nx>(frustum.min_org_rdir.z));
- const vfloat<Nx> fmaxX = msub(bmaxX, vfloat<Nx>(frustum.max_rdir.x), vfloat<Nx>(frustum.max_org_rdir.x));
- const vfloat<Nx> fmaxY = msub(bmaxY, vfloat<Nx>(frustum.max_rdir.y), vfloat<Nx>(frustum.max_org_rdir.y));
- const vfloat<Nx> fmaxZ = msub(bmaxZ, vfloat<Nx>(frustum.max_rdir.z), vfloat<Nx>(frustum.max_org_rdir.z));
-#endif
- const vfloat<Nx> fmin = maxi(fminX, fminY, fminZ, vfloat<Nx>(frustum.min_dist));
- dist = fmin;
- const vfloat<Nx> fmax = mini(fmaxX, fmaxY, fmaxZ, vfloat<Nx>(frustum.max_dist));
- const vbool<Nx> vmask_node_hit = fmin <= fmax;
- size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
- return m_node;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Robust AABBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int Nx>
- __forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
- const FrustumRobust& frustum, vfloat<Nx>& dist)
- {
- const vfloat<Nx> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
- const vfloat<Nx> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
- const vfloat<Nx> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
- const vfloat<Nx> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
- const vfloat<Nx> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
- const vfloat<Nx> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
-
- const vfloat<Nx> fminX = (bminX - vfloat<Nx>(frustum.min_org.x)) * vfloat<Nx>(frustum.min_rdir.x);
- const vfloat<Nx> fminY = (bminY - vfloat<Nx>(frustum.min_org.y)) * vfloat<Nx>(frustum.min_rdir.y);
- const vfloat<Nx> fminZ = (bminZ - vfloat<Nx>(frustum.min_org.z)) * vfloat<Nx>(frustum.min_rdir.z);
- const vfloat<Nx> fmaxX = (bmaxX - vfloat<Nx>(frustum.max_org.x)) * vfloat<Nx>(frustum.max_rdir.x);
- const vfloat<Nx> fmaxY = (bmaxY - vfloat<Nx>(frustum.max_org.y)) * vfloat<Nx>(frustum.max_rdir.y);
- const vfloat<Nx> fmaxZ = (bmaxZ - vfloat<Nx>(frustum.max_org.z)) * vfloat<Nx>(frustum.max_rdir.z);
-
- const float round_down = 1.0f-2.0f*float(ulp); // FIXME: use per instruction rounding for AVX512
- const float round_up = 1.0f+2.0f*float(ulp);
- const vfloat<Nx> fmin = max(fminX, fminY, fminZ, vfloat<Nx>(frustum.min_dist));
- dist = fmin;
- const vfloat<Nx> fmax = min(fmaxX, fmaxY, fmaxZ, vfloat<Nx>(frustum.max_dist));
- const vbool<Nx> vmask_node_hit = (round_down*fmin <= round_up*fmax);
- size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
- return m_node;
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet.h b/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet.h
deleted file mode 100644
index 0543e56f8e..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet.h
+++ /dev/null
@@ -1,843 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "node_intersector.h"
-
-namespace embree
-{
- namespace isa
- {
- //////////////////////////////////////////////////////////////////////////////////////
- // Ray packet structure used in hybrid traversal
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int K, bool robust>
- struct TravRayK;
-
- /* Fast variant */
- template<int K>
- struct TravRayK<K, false>
- {
- __forceinline TravRayK() {}
-
- __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
- {
- init(ray_org, ray_dir, N);
- }
-
- __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
- {
- init(ray_org, ray_dir, N);
- tnear = ray_tnear;
- tfar = ray_tfar;
- }
-
- __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
- {
- org = ray_org;
- dir = ray_dir;
- rdir = rcp_safe(ray_dir);
-#if defined(__aarch64__)
- neg_org_rdir = -(org * rdir);
-#elif defined(__AVX2__)
- org_rdir = org * rdir;
-#endif
- if (N)
- {
- const int size = sizeof(float)*N;
- nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
- nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
- nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
- }
- }
-
- Vec3vf<K> org;
- Vec3vf<K> dir;
- Vec3vf<K> rdir;
-#if defined(__aarch64__)
- Vec3vf<K> neg_org_rdir;
-#elif defined(__AVX2__)
- Vec3vf<K> org_rdir;
-#endif
- Vec3vi<K> nearXYZ;
- vfloat<K> tnear;
- vfloat<K> tfar;
- };
-
- template<int K>
- using TravRayKFast = TravRayK<K, false>;
-
- /* Robust variant */
- template<int K>
- struct TravRayK<K, true>
- {
- __forceinline TravRayK() {}
-
- __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
- {
- init(ray_org, ray_dir, N);
- }
-
- __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
- {
- init(ray_org, ray_dir, N);
- tnear = ray_tnear;
- tfar = ray_tfar;
- }
-
- __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
- {
- org = ray_org;
- dir = ray_dir;
- rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir));
-
- if (N)
- {
- const int size = sizeof(float)*N;
- nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
- nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
- nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
- }
- }
-
- Vec3vf<K> org;
- Vec3vf<K> dir;
- Vec3vf<K> rdir;
- Vec3vi<K> nearXYZ;
- vfloat<K> tnear;
- vfloat<K> tfar;
- };
-
- template<int K>
- using TravRayKRobust = TravRayK<K, true>;
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast AABBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K>
- __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNode* node, size_t i,
- const TravRayKFast<K>& ray, vfloat<K>& dist)
-
- {
-#if defined(__aarch64__)
- const vfloat<K> lclipMinX = madd(node->lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMinY = madd(node->lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMinZ = madd(node->lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<K> lclipMaxX = madd(node->upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMaxY = madd(node->upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMaxZ = madd(node->upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
-#elif defined(__AVX2__)
- const vfloat<K> lclipMinX = msub(node->lower_x[i], ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMinY = msub(node->lower_y[i], ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMinZ = msub(node->lower_z[i], ray.rdir.z, ray.org_rdir.z);
- const vfloat<K> lclipMaxX = msub(node->upper_x[i], ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMaxY = msub(node->upper_y[i], ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMaxZ = msub(node->upper_z[i], ray.rdir.z, ray.org_rdir.z);
- #else
- const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
- #endif
-
- #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- if (K == 16)
- {
- /* use mixed float/int min/max */
- const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
- dist = lnearP;
- return lhit;
- }
- else
- #endif
- {
- const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
- #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
- #else
- const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
- #endif
- dist = lnearP;
- return lhit;
- }
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Robust AABBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K>
- __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNode* node, size_t i,
- const TravRayKRobust<K>& ray, vfloat<K>& dist)
- {
- // FIXME: use per instruction rounding for AVX512
- const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
- const float round_up = 1.0f+3.0f*float(ulp);
- const float round_down = 1.0f-3.0f*float(ulp);
- const vfloat<K> lnearP = round_down*max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = round_up *min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
- dist = lnearP;
- return lhit;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast AABBNodeMB intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K>
- __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
- const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
- {
- const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
- const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
- const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
- const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
- const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
- const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
-
-#if defined(__aarch64__)
- const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
-#elif defined(__AVX2__)
- const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
-#else
- const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
-#endif
-
-#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- if (K == 16)
- {
- /* use mixed float/int min/max */
- const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
- dist = lnearP;
- return lhit;
- }
- else
-#endif
- {
- const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
-#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
-#else
- const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
-#endif
- dist = lnearP;
- return lhit;
- }
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Robust AABBNodeMB intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K>
- __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
- const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
- {
- const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
- const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
- const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
- const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
- const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
- const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
-
- const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
-
- const float round_up = 1.0f+3.0f*float(ulp);
- const float round_down = 1.0f-3.0f*float(ulp);
-
-#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- if (K == 16)
- {
- const vfloat<K> lnearP = round_down*maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = round_up *mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
- dist = lnearP;
- return lhit;
- }
- else
-#endif
- {
- const vfloat<K> lnearP = round_down*maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = round_up *mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
- dist = lnearP;
- return lhit;
- }
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast AABBNodeMB4D intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K>
- __forceinline vbool<K> intersectNodeKMB4D(const typename BVHN<N>::NodeRef ref, const size_t i,
- const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
- {
- const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
-
- const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
- const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
- const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
- const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
- const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
- const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
-
-#if defined(__aarch64__)
- const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
-#elif defined(__AVX2__)
- const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
-#else
- const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
-#endif
-
- const vfloat<K> lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
- vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
- if (unlikely(ref.isAABBNodeMB4D())) {
- const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
- lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
- }
- dist = lnearP;
- return lhit;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Robust AABBNodeMB4D intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K>
- __forceinline vbool<K> intersectNodeKMB4DRobust(const typename BVHN<N>::NodeRef ref, const size_t i,
- const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
- {
- const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
-
- const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
- const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
- const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
- const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
- const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
- const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
-
- const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
-
- const float round_up = 1.0f+3.0f*float(ulp);
- const float round_down = 1.0f-3.0f*float(ulp);
- const vfloat<K> lnearP = round_down*maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = round_up *mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
- vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
-
- if (unlikely(ref.isAABBNodeMB4D())) {
- const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
- lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
- }
- dist = lnearP;
- return lhit;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast OBBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K, bool robust>
- __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNode* node, const size_t i,
- const TravRayK<K,robust>& ray, vfloat<K>& dist)
- {
- const AffineSpace3vf<K> naabb(Vec3f(node->naabb.l.vx.x[i], node->naabb.l.vx.y[i], node->naabb.l.vx.z[i]),
- Vec3f(node->naabb.l.vy.x[i], node->naabb.l.vy.y[i], node->naabb.l.vy.z[i]),
- Vec3f(node->naabb.l.vz.x[i], node->naabb.l.vz.y[i], node->naabb.l.vz.z[i]),
- Vec3f(node->naabb.p .x[i], node->naabb.p .y[i], node->naabb.p .z[i]));
-
- const Vec3vf<K> dir = xfmVector(naabb, ray.dir);
- const Vec3vf<K> nrdir = Vec3vf<K>(vfloat<K>(-1.0f)) * rcp_safe(dir); // FIXME: negate instead of mul with -1?
- const Vec3vf<K> org = xfmPoint(naabb, ray.org);
-
- const vfloat<K> lclipMinX = org.x * nrdir.x; // (Vec3fa(zero) - org) * rdir;
- const vfloat<K> lclipMinY = org.y * nrdir.y;
- const vfloat<K> lclipMinZ = org.z * nrdir.z;
- const vfloat<K> lclipMaxX = lclipMinX - nrdir.x; // (Vec3fa(one) - org) * rdir;
- const vfloat<K> lclipMaxY = lclipMinY - nrdir.y;
- const vfloat<K> lclipMaxZ = lclipMinZ - nrdir.z;
-
- vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
- vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
- if (robust) {
- lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
- lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
- }
- const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
- dist = lnearP;
- return lhit;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast OBBNodeMB intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K, bool robust>
- __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNodeMB* node, const size_t i,
- const TravRayK<K,robust>& ray, const vfloat<K>& time, vfloat<K>& dist)
- {
- const AffineSpace3vf<K> xfm(Vec3f(node->space0.l.vx.x[i], node->space0.l.vx.y[i], node->space0.l.vx.z[i]),
- Vec3f(node->space0.l.vy.x[i], node->space0.l.vy.y[i], node->space0.l.vy.z[i]),
- Vec3f(node->space0.l.vz.x[i], node->space0.l.vz.y[i], node->space0.l.vz.z[i]),
- Vec3f(node->space0.p .x[i], node->space0.p .y[i], node->space0.p .z[i]));
-
- const Vec3vf<K> b0_lower = zero;
- const Vec3vf<K> b0_upper = one;
- const Vec3vf<K> b1_lower(node->b1.lower.x[i], node->b1.lower.y[i], node->b1.lower.z[i]);
- const Vec3vf<K> b1_upper(node->b1.upper.x[i], node->b1.upper.y[i], node->b1.upper.z[i]);
- const Vec3vf<K> lower = lerp(b0_lower, b1_lower, time);
- const Vec3vf<K> upper = lerp(b0_upper, b1_upper, time);
-
- const Vec3vf<K> dir = xfmVector(xfm, ray.dir);
- const Vec3vf<K> rdir = rcp_safe(dir);
- const Vec3vf<K> org = xfmPoint(xfm, ray.org);
-
- const vfloat<K> lclipMinX = (lower.x - org.x) * rdir.x;
- const vfloat<K> lclipMinY = (lower.y - org.y) * rdir.y;
- const vfloat<K> lclipMinZ = (lower.z - org.z) * rdir.z;
- const vfloat<K> lclipMaxX = (upper.x - org.x) * rdir.x;
- const vfloat<K> lclipMaxY = (upper.y - org.y) * rdir.y;
- const vfloat<K> lclipMaxZ = (upper.z - org.z) * rdir.z;
-
- vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
- vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
- if (robust) {
- lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
- lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
- }
-
- const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
- dist = lnearP;
- return lhit;
- }
-
-
-
- //////////////////////////////////////////////////////////////////////////////////////
- // QuantizedBaseNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K>
- __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
- const TravRayK<K,false>& ray, vfloat<K>& dist)
-
- {
- assert(movemask(node->validMask()) & ((size_t)1 << i));
- const vfloat<N> lower_x = node->dequantizeLowerX();
- const vfloat<N> upper_x = node->dequantizeUpperX();
- const vfloat<N> lower_y = node->dequantizeLowerY();
- const vfloat<N> upper_y = node->dequantizeUpperY();
- const vfloat<N> lower_z = node->dequantizeLowerZ();
- const vfloat<N> upper_z = node->dequantizeUpperZ();
-
- #if defined(__aarch64__)
- const vfloat<K> lclipMinX = madd(lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMinY = madd(lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMinZ = madd(lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<K> lclipMaxX = madd(upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMaxY = madd(upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMaxZ = madd(upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
- #elif defined(__AVX2__)
- const vfloat<K> lclipMinX = msub(lower_x[i], ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMinY = msub(lower_y[i], ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMinZ = msub(lower_z[i], ray.rdir.z, ray.org_rdir.z);
- const vfloat<K> lclipMaxX = msub(upper_x[i], ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMaxY = msub(upper_y[i], ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMaxZ = msub(upper_z[i], ray.rdir.z, ray.org_rdir.z);
- #else
- const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
- #endif
-
- #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- if (K == 16)
- {
- /* use mixed float/int min/max */
- const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
- dist = lnearP;
- return lhit;
- }
- else
- #endif
- {
- const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
- #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
- #else
- const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
- #endif
- dist = lnearP;
- return lhit;
- }
- }
-
- template<int N, int K>
- __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
- const TravRayK<K,true>& ray, vfloat<K>& dist)
-
- {
- assert(movemask(node->validMask()) & ((size_t)1 << i));
- const vfloat<N> lower_x = node->dequantizeLowerX();
- const vfloat<N> upper_x = node->dequantizeUpperX();
- const vfloat<N> lower_y = node->dequantizeLowerY();
- const vfloat<N> upper_y = node->dequantizeUpperY();
- const vfloat<N> lower_z = node->dequantizeLowerZ();
- const vfloat<N> upper_z = node->dequantizeUpperZ();
-
- const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
-
- const float round_up = 1.0f+3.0f*float(ulp);
- const float round_down = 1.0f-3.0f*float(ulp);
-
- const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
- dist = lnearP;
- return lhit;
- }
-
- template<int N, int K>
- __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
- const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
-
- {
- assert(movemask(node->validMask()) & ((size_t)1 << i));
-
- const vfloat<K> lower_x = node->dequantizeLowerX(i,time);
- const vfloat<K> upper_x = node->dequantizeUpperX(i,time);
- const vfloat<K> lower_y = node->dequantizeLowerY(i,time);
- const vfloat<K> upper_y = node->dequantizeUpperY(i,time);
- const vfloat<K> lower_z = node->dequantizeLowerZ(i,time);
- const vfloat<K> upper_z = node->dequantizeUpperZ(i,time);
-
-#if defined(__aarch64__)
- const vfloat<K> lclipMinX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMinY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMinZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<K> lclipMaxX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMaxY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMaxZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
-#elif defined(__AVX2__)
- const vfloat<K> lclipMinX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMinY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMinZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat<K> lclipMaxX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMaxY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMaxZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
-#else
- const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
- #endif
- const vfloat<K> lnearP = max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
- dist = lnearP;
- return lhit;
- }
-
-
- template<int N, int K>
- __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
- const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
-
- {
- assert(movemask(node->validMask()) & ((size_t)1 << i));
-
- const vfloat<K> lower_x = node->dequantizeLowerX(i,time);
- const vfloat<K> upper_x = node->dequantizeUpperX(i,time);
- const vfloat<K> lower_y = node->dequantizeLowerY(i,time);
- const vfloat<K> upper_y = node->dequantizeUpperY(i,time);
- const vfloat<K> lower_z = node->dequantizeLowerZ(i,time);
- const vfloat<K> upper_z = node->dequantizeUpperZ(i,time);
-
- const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
-
- const float round_up = 1.0f+3.0f*float(ulp);
- const float round_down = 1.0f-3.0f*float(ulp);
-
- const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
- dist = lnearP;
- return lhit;
- }
-
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Node intersectors used in hybrid traversal
- //////////////////////////////////////////////////////////////////////////////////////
-
- /*! Intersects N nodes with K rays */
- template<int N, int K, int types, bool robust>
- struct BVHNNodeIntersectorK;
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN1, false>
- {
- /* vmask is both an input and an output parameter! Its initial value should be the parent node
- hit mask, which is used for correctly computing the current hit mask. The parent hit mask
- is actually required only for motion blur node intersections (because different rays may
- have different times), so for regular nodes vmask is simply overwritten. */
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN1, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN2, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN2, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- if (likely(node.isAABBNode())) vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
- else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- if (likely(node.isAABBNode())) vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
- else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- if (likely(node.isAABBNodeMB())) vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
- else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- if (likely(node.isAABBNodeMB())) vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
- else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
- vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
- } else /*if (unlikely(node.isOBBNodeMB()))*/ {
- assert(node.isOBBNodeMB());
- vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
- }
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
- vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
- } else /*if (unlikely(node.isOBBNodeMB()))*/ {
- assert(node.isOBBNodeMB());
- vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
- }
- return true;
- }
- };
-
-
- /*! Intersects N nodes with K rays */
- template<int N, int K, bool robust>
- struct BVHNQuantizedBaseNodeIntersectorK;
-
- template<int N, int K>
- struct BVHNQuantizedBaseNodeIntersectorK<N, K, false>
- {
- static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
- const TravRayK<K,false>& ray, vfloat<K>& dist)
- {
- return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
- }
-
- static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
- const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
- {
- return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
- }
-
- };
-
- template<int N, int K>
- struct BVHNQuantizedBaseNodeIntersectorK<N, K, true>
- {
- static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
- const TravRayK<K,true>& ray, vfloat<K>& dist)
- {
- return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
- }
-
- static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
- const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
- {
- return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
- }
- };
-
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet_stream.h b/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet_stream.h
deleted file mode 100644
index f379b57aea..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet_stream.h
+++ /dev/null
@@ -1,215 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "node_intersector.h"
-
-namespace embree
-{
- namespace isa
- {
- //////////////////////////////////////////////////////////////////////////////////////
- // Ray packet structure used in stream traversal
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int K, bool robust>
- struct TravRayKStream;
-
- /* Fast variant */
- template<int K>
- struct TravRayKStream<K, false>
- {
- __forceinline TravRayKStream() {}
-
- __forceinline TravRayKStream(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar)
- {
- init(ray_org, ray_dir);
- tnear = ray_tnear;
- tfar = ray_tfar;
- }
-
- __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir)
- {
- rdir = rcp_safe(ray_dir);
-#if defined(__aarch64__)
- neg_org_rdir = -(ray_org * rdir);
-#else
- org_rdir = ray_org * rdir;
-#endif
- }
-
- Vec3vf<K> rdir;
-#if defined(__aarch64__)
- Vec3vf<K> neg_org_rdir;
-#else
- Vec3vf<K> org_rdir;
-#endif
- vfloat<K> tnear;
- vfloat<K> tfar;
- };
-
- template<int K>
- using TravRayKStreamFast = TravRayKStream<K, false>;
-
- /* Robust variant */
- template<int K>
- struct TravRayKStream<K, true>
- {
- __forceinline TravRayKStream() {}
-
- __forceinline TravRayKStream(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar)
- {
- init(ray_org, ray_dir);
- tnear = ray_tnear;
- tfar = ray_tfar;
- }
-
- __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir)
- {
- rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir));
- org = ray_org;
- }
-
- Vec3vf<K> rdir;
- Vec3vf<K> org;
- vfloat<K> tnear;
- vfloat<K> tfar;
- };
-
- template<int K>
- using TravRayKStreamRobust = TravRayKStream<K, true>;
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast AABBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int Nx, int K>
- __forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node,
- const TravRayKStreamFast<K>& ray, size_t k, const NearFarPrecalculations& nf)
- {
- const vfloat<Nx> bminX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX));
- const vfloat<Nx> bminY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY));
- const vfloat<Nx> bminZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ));
- const vfloat<Nx> bmaxX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX));
- const vfloat<Nx> bmaxY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY));
- const vfloat<Nx> bmaxZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ));
-
-#if defined (__aarch64__)
- const vfloat<Nx> rminX = madd(bminX, vfloat<Nx>(ray.rdir.x[k]), vfloat<Nx>(ray.neg_org_rdir.x[k]));
- const vfloat<Nx> rminY = madd(bminY, vfloat<Nx>(ray.rdir.y[k]), vfloat<Nx>(ray.neg_org_rdir.y[k]));
- const vfloat<Nx> rminZ = madd(bminZ, vfloat<Nx>(ray.rdir.z[k]), vfloat<Nx>(ray.neg_org_rdir.z[k]));
- const vfloat<Nx> rmaxX = madd(bmaxX, vfloat<Nx>(ray.rdir.x[k]), vfloat<Nx>(ray.neg_org_rdir.x[k]));
- const vfloat<Nx> rmaxY = madd(bmaxY, vfloat<Nx>(ray.rdir.y[k]), vfloat<Nx>(ray.neg_org_rdir.y[k]));
- const vfloat<Nx> rmaxZ = madd(bmaxZ, vfloat<Nx>(ray.rdir.z[k]), vfloat<Nx>(ray.neg_org_rdir.z[k]));
-#else
- const vfloat<Nx> rminX = msub(bminX, vfloat<Nx>(ray.rdir.x[k]), vfloat<Nx>(ray.org_rdir.x[k]));
- const vfloat<Nx> rminY = msub(bminY, vfloat<Nx>(ray.rdir.y[k]), vfloat<Nx>(ray.org_rdir.y[k]));
- const vfloat<Nx> rminZ = msub(bminZ, vfloat<Nx>(ray.rdir.z[k]), vfloat<Nx>(ray.org_rdir.z[k]));
- const vfloat<Nx> rmaxX = msub(bmaxX, vfloat<Nx>(ray.rdir.x[k]), vfloat<Nx>(ray.org_rdir.x[k]));
- const vfloat<Nx> rmaxY = msub(bmaxY, vfloat<Nx>(ray.rdir.y[k]), vfloat<Nx>(ray.org_rdir.y[k]));
- const vfloat<Nx> rmaxZ = msub(bmaxZ, vfloat<Nx>(ray.rdir.z[k]), vfloat<Nx>(ray.org_rdir.z[k]));
-#endif
- const vfloat<Nx> rmin = maxi(rminX, rminY, rminZ, vfloat<Nx>(ray.tnear[k]));
- const vfloat<Nx> rmax = mini(rmaxX, rmaxY, rmaxZ, vfloat<Nx>(ray.tfar[k]));
-
- const vbool<Nx> vmask_first_hit = rmin <= rmax;
-
- return movemask(vmask_first_hit) & (((size_t)1 << N)-1);
- }
-
- template<int N, int K>
- __forceinline size_t intersectNodeK(const typename BVHN<N>::AABBNode* __restrict__ node, size_t i,
- const TravRayKStreamFast<K>& ray, const NearFarPrecalculations& nf)
- {
- char* ptr = (char*)&node->lower_x + i*sizeof(float);
- const vfloat<K> bminX = *(const float*)(ptr + nf.nearX);
- const vfloat<K> bminY = *(const float*)(ptr + nf.nearY);
- const vfloat<K> bminZ = *(const float*)(ptr + nf.nearZ);
- const vfloat<K> bmaxX = *(const float*)(ptr + nf.farX);
- const vfloat<K> bmaxY = *(const float*)(ptr + nf.farY);
- const vfloat<K> bmaxZ = *(const float*)(ptr + nf.farZ);
-
-#if defined (__aarch64__)
- const vfloat<K> rminX = madd(bminX, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> rminY = madd(bminY, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> rminZ = madd(bminZ, ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<K> rmaxX = madd(bmaxX, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> rmaxY = madd(bmaxY, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> rmaxZ = madd(bmaxZ, ray.rdir.z, ray.neg_org_rdir.z);
-#else
- const vfloat<K> rminX = msub(bminX, ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> rminY = msub(bminY, ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> rminZ = msub(bminZ, ray.rdir.z, ray.org_rdir.z);
- const vfloat<K> rmaxX = msub(bmaxX, ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> rmaxY = msub(bmaxY, ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> rmaxZ = msub(bmaxZ, ray.rdir.z, ray.org_rdir.z);
-#endif
-
- const vfloat<K> rmin = maxi(rminX, rminY, rminZ, ray.tnear);
- const vfloat<K> rmax = mini(rmaxX, rmaxY, rmaxZ, ray.tfar);
-
- const vbool<K> vmask_first_hit = rmin <= rmax;
-
- return movemask(vmask_first_hit);
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Robust AABBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int Nx, int K>
- __forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node,
- const TravRayKStreamRobust<K>& ray, size_t k, const NearFarPrecalculations& nf)
- {
- const vfloat<Nx> bminX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX));
- const vfloat<Nx> bminY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY));
- const vfloat<Nx> bminZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ));
- const vfloat<Nx> bmaxX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX));
- const vfloat<Nx> bmaxY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY));
- const vfloat<Nx> bmaxZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ));
-
- const vfloat<Nx> rminX = (bminX - vfloat<Nx>(ray.org.x[k])) * vfloat<Nx>(ray.rdir.x[k]);
- const vfloat<Nx> rminY = (bminY - vfloat<Nx>(ray.org.y[k])) * vfloat<Nx>(ray.rdir.y[k]);
- const vfloat<Nx> rminZ = (bminZ - vfloat<Nx>(ray.org.z[k])) * vfloat<Nx>(ray.rdir.z[k]);
- const vfloat<Nx> rmaxX = (bmaxX - vfloat<Nx>(ray.org.x[k])) * vfloat<Nx>(ray.rdir.x[k]);
- const vfloat<Nx> rmaxY = (bmaxY - vfloat<Nx>(ray.org.y[k])) * vfloat<Nx>(ray.rdir.y[k]);
- const vfloat<Nx> rmaxZ = (bmaxZ - vfloat<Nx>(ray.org.z[k])) * vfloat<Nx>(ray.rdir.z[k]);
- const float round_up = 1.0f+3.0f*float(ulp); // FIXME: use per instruction rounding for AVX512
- const vfloat<Nx> rmin = max(rminX, rminY, rminZ, vfloat<Nx>(ray.tnear[k]));
- const vfloat<Nx> rmax = round_up *min(rmaxX, rmaxY, rmaxZ, vfloat<Nx>(ray.tfar[k]));
-
- const vbool<Nx> vmask_first_hit = rmin <= rmax;
-
- return movemask(vmask_first_hit) & (((size_t)1 << N)-1);
- }
-
- template<int N, int K>
- __forceinline size_t intersectNodeK(const typename BVHN<N>::AABBNode* __restrict__ node, size_t i,
- const TravRayKStreamRobust<K>& ray, const NearFarPrecalculations& nf)
- {
- char *ptr = (char*)&node->lower_x + i*sizeof(float);
- const vfloat<K> bminX = *(const float*)(ptr + nf.nearX);
- const vfloat<K> bminY = *(const float*)(ptr + nf.nearY);
- const vfloat<K> bminZ = *(const float*)(ptr + nf.nearZ);
- const vfloat<K> bmaxX = *(const float*)(ptr + nf.farX);
- const vfloat<K> bmaxY = *(const float*)(ptr + nf.farY);
- const vfloat<K> bmaxZ = *(const float*)(ptr + nf.farZ);
-
- const vfloat<K> rminX = (bminX - ray.org.x) * ray.rdir.x;
- const vfloat<K> rminY = (bminY - ray.org.y) * ray.rdir.y;
- const vfloat<K> rminZ = (bminZ - ray.org.z) * ray.rdir.z;
- const vfloat<K> rmaxX = (bmaxX - ray.org.x) * ray.rdir.x;
- const vfloat<K> rmaxY = (bmaxY - ray.org.y) * ray.rdir.y;
- const vfloat<K> rmaxZ = (bmaxZ - ray.org.z) * ray.rdir.z;
-
- const float round_up = 1.0f+3.0f*float(ulp);
- const vfloat<K> rmin = max(rminX, rminY, rminZ, vfloat<K>(ray.tnear));
- const vfloat<K> rmax = round_up * min(rmaxX, rmaxY, rmaxZ, vfloat<K>(ray.tfar));
-
- const vbool<K> vmask_first_hit = rmin <= rmax;
-
- return movemask(vmask_first_hit);
- }
- }
-}