summaryrefslogtreecommitdiff
path: root/thirdparty/embree-aarch64/kernels/bvh/bvh.h
blob: 8fdf912e520b965e7eb746db9f5df7fdfbbdeea1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#pragma once

/* include all node types */
#include "bvh_node_aabb.h"
#include "bvh_node_aabb_mb.h"
#include "bvh_node_aabb_mb4d.h"
#include "bvh_node_obb.h"
#include "bvh_node_obb_mb.h"
#include "bvh_node_qaabb.h"

namespace embree
{
  /*! flags used to enable specific node types in intersectors */
  enum BVHNodeFlags
  {
    BVH_FLAG_ALIGNED_NODE = 0x00001,
    BVH_FLAG_ALIGNED_NODE_MB = 0x00010,
    BVH_FLAG_UNALIGNED_NODE = 0x00100,
    BVH_FLAG_UNALIGNED_NODE_MB = 0x01000,
    BVH_FLAG_QUANTIZED_NODE = 0x100000,
    BVH_FLAG_ALIGNED_NODE_MB4D = 0x1000000,
    
    /* short versions */
    BVH_AN1 = BVH_FLAG_ALIGNED_NODE,
    BVH_AN2 = BVH_FLAG_ALIGNED_NODE_MB,
    BVH_AN2_AN4D = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D,
    BVH_UN1 = BVH_FLAG_UNALIGNED_NODE,
    BVH_UN2 = BVH_FLAG_UNALIGNED_NODE_MB,
    BVH_MB = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_UNALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D,
    BVH_AN1_UN1 = BVH_FLAG_ALIGNED_NODE | BVH_FLAG_UNALIGNED_NODE,
    BVH_AN2_UN2 = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_UNALIGNED_NODE_MB,
    BVH_AN2_AN4D_UN2 = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D | BVH_FLAG_UNALIGNED_NODE_MB,
    BVH_QN1 = BVH_FLAG_QUANTIZED_NODE
  };
  
  /*! Multi BVH with N children. Each node stores the bounding box of
   * it's N children as well as N child references. */
  template<int N>
    class BVHN : public AccelData
  {
    ALIGNED_CLASS_(16);
  public:
    
    /*! forward declaration of node ref type */
    typedef NodeRefPtr<N> NodeRef;
    typedef BaseNode_t<NodeRef,N> BaseNode;
    typedef AABBNode_t<NodeRef,N> AABBNode;
    typedef AABBNodeMB_t<NodeRef,N> AABBNodeMB;
    typedef AABBNodeMB4D_t<NodeRef,N> AABBNodeMB4D;
    typedef OBBNode_t<NodeRef,N> OBBNode;
    typedef OBBNodeMB_t<NodeRef,N> OBBNodeMB;
    typedef QuantizedBaseNode_t<N> QuantizedBaseNode;
    typedef QuantizedBaseNodeMB_t<N> QuantizedBaseNodeMB;
    typedef QuantizedNode_t<NodeRef,N> QuantizedNode;
    
    /*! Number of bytes the nodes and primitives are minimally aligned to.*/
    static const size_t byteAlignment = 16;
    static const size_t byteNodeAlignment = 4*N;
    
    /*! Empty node */
    static const size_t emptyNode = NodeRef::emptyNode;
    
    /*! Invalid node, used as marker in traversal */
    static const size_t invalidNode = NodeRef::invalidNode;
    static const size_t popRay      = NodeRef::popRay;
    
    /*! Maximum depth of the BVH. */
    static const size_t maxBuildDepth = 32;
    static const size_t maxBuildDepthLeaf = maxBuildDepth+8;
    static const size_t maxDepth = 2*maxBuildDepthLeaf; // 2x because of two level builder
    
    /*! Maximum number of primitive blocks in a leaf. */
    static const size_t maxLeafBlocks = NodeRef::maxLeafBlocks;
    
  public:
    
    /*! Builder interface to create allocator */
    struct CreateAlloc : public FastAllocator::Create {
      __forceinline CreateAlloc (BVHN* bvh) : FastAllocator::Create(&bvh->alloc) {}
    };

    typedef BVHNodeRecord<NodeRef>     NodeRecord;
    typedef BVHNodeRecordMB<NodeRef>   NodeRecordMB;
    typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
    
  public:
    
    /*! BVHN default constructor. */
    BVHN (const PrimitiveType& primTy, Scene* scene);
    
    /*! BVHN destruction */
    ~BVHN ();
    
    /*! clears the acceleration structure */
    void clear();
    
    /*! sets BVH members after build */
    void set (NodeRef root, const LBBox3fa& bounds, size_t numPrimitives);
    
    /*! Clears the barrier bits of a subtree. */
    void clearBarrier(NodeRef& node);
    
    /*! lays out num large nodes of the BVH */
    void layoutLargeNodes(size_t num);
    NodeRef layoutLargeNodesRecursion(NodeRef& node, const FastAllocator::CachedAllocator& allocator);
    
    /*! called by all builders before build starts */
    double preBuild(const std::string& builderName);
    
    /*! called by all builders after build ended */
    void postBuild(double t0);
    
    /*! allocator class */
    struct Allocator {
      BVHN* bvh;
      Allocator (BVHN* bvh) : bvh(bvh) {}
      __forceinline void* operator() (size_t bytes) const { 
        return bvh->alloc._threadLocal()->malloc(&bvh->alloc,bytes); 
      }
    };
    
    /*! post build cleanup */
    void cleanup() {
      alloc.cleanup();
    }
    
  public:
    
    /*! Encodes a node */
    static __forceinline NodeRef encodeNode(AABBNode* node) { return NodeRef::encodeNode(node); }
    static __forceinline NodeRef encodeNode(AABBNodeMB* node) { return NodeRef::encodeNode(node); }
    static __forceinline NodeRef encodeNode(AABBNodeMB4D* node) { return NodeRef::encodeNode(node); }
    static __forceinline NodeRef encodeNode(OBBNode* node) { return NodeRef::encodeNode(node); }
    static __forceinline NodeRef encodeNode(OBBNodeMB* node) { return NodeRef::encodeNode(node); }
    static __forceinline NodeRef encodeLeaf(void* tri, size_t num) { return NodeRef::encodeLeaf(tri,num); }
    static __forceinline NodeRef encodeTypedLeaf(void* ptr, size_t ty) { return NodeRef::encodeTypedLeaf(ptr,ty); }
    
  public:
    
    /*! Prefetches the node this reference points to */
    __forceinline static void prefetch(const NodeRef ref, int types=0)
    {
#if defined(__AVX512PF__) // MIC
      if (types != BVH_FLAG_QUANTIZED_NODE) {
        prefetchL2(((char*)ref.ptr)+0*64);
        prefetchL2(((char*)ref.ptr)+1*64);
        if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
          prefetchL2(((char*)ref.ptr)+2*64);
          prefetchL2(((char*)ref.ptr)+3*64);
        }
        if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
          /* KNL still needs L2 prefetches for large nodes */
          prefetchL2(((char*)ref.ptr)+4*64);
          prefetchL2(((char*)ref.ptr)+5*64);
          prefetchL2(((char*)ref.ptr)+6*64);
          prefetchL2(((char*)ref.ptr)+7*64);
        }
      }
      else
      {
        /* todo: reduce if 32bit offsets are enabled */
        prefetchL2(((char*)ref.ptr)+0*64);
        prefetchL2(((char*)ref.ptr)+1*64);
        prefetchL2(((char*)ref.ptr)+2*64);
      }
#else
      if (types != BVH_FLAG_QUANTIZED_NODE) {
        prefetchL1(((char*)ref.ptr)+0*64);
        prefetchL1(((char*)ref.ptr)+1*64);
        if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
          prefetchL1(((char*)ref.ptr)+2*64);
          prefetchL1(((char*)ref.ptr)+3*64);
        }
        if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
          /* deactivate for large nodes on Xeon, as it introduces regressions */
          //prefetchL1(((char*)ref.ptr)+4*64);
          //prefetchL1(((char*)ref.ptr)+5*64);
          //prefetchL1(((char*)ref.ptr)+6*64);
          //prefetchL1(((char*)ref.ptr)+7*64);
        }
      }
      else
      {
        /* todo: reduce if 32bit offsets are enabled */
        prefetchL1(((char*)ref.ptr)+0*64);
        prefetchL1(((char*)ref.ptr)+1*64);
        prefetchL1(((char*)ref.ptr)+2*64);
      }
#endif
    }
    
    __forceinline static void prefetchW(const NodeRef ref, int types=0)
    {
      embree::prefetchEX(((char*)ref.ptr)+0*64);
      embree::prefetchEX(((char*)ref.ptr)+1*64);
      if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
        embree::prefetchEX(((char*)ref.ptr)+2*64);
        embree::prefetchEX(((char*)ref.ptr)+3*64);
      }
      if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
        embree::prefetchEX(((char*)ref.ptr)+4*64);
        embree::prefetchEX(((char*)ref.ptr)+5*64);
        embree::prefetchEX(((char*)ref.ptr)+6*64);
        embree::prefetchEX(((char*)ref.ptr)+7*64);
      }
    }
    
    /*! bvh type information */
  public:
    const PrimitiveType* primTy;       //!< primitive type stored in the BVH
    
    /*! bvh data */
  public:
    Device* device;                    //!< device pointer
    Scene* scene;                      //!< scene pointer
    NodeRef root;                      //!< root node
    FastAllocator alloc;               //!< allocator used to allocate nodes
    
    /*! statistics data */
  public:
    size_t numPrimitives;              //!< number of primitives the BVH is build over
    size_t numVertices;                //!< number of vertices the BVH references
    
    /*! data arrays for special builders */
  public:
    std::vector<BVHN*> objects;
    vector_t<char,aligned_allocator<char,32>> subdiv_patches;
  };
  
  typedef BVHN<4> BVH4;
  typedef BVHN<8> BVH8;
}