summaryrefslogtreecommitdiff
path: root/thirdparty/embree-aarch64/kernels/builders/heuristic_strand_array.h
blob: ede0d04c78ab56d5190889e17fd3e601ba2b2f28 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "priminfo.h"
#include "../../common/algorithms/parallel_reduce.h"
#include "../../common/algorithms/parallel_partition.h"

namespace embree
{
  namespace isa
  { 
    /*! Performs standard object binning */
    struct HeuristicStrandSplit
    {
      typedef range<size_t> Set;
  
      static const size_t PARALLEL_THRESHOLD = 10000;
      static const size_t PARALLEL_FIND_BLOCK_SIZE = 4096;
      static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 64;

      /*! stores all information to perform some split */
      struct Split
      {    
	/*! construct an invalid split by default */
	__forceinline Split()
	  : sah(inf), axis0(zero), axis1(zero) {}
	
	/*! constructs specified split */
	__forceinline Split(const float sah, const Vec3fa& axis0, const Vec3fa& axis1)
	  : sah(sah), axis0(axis0), axis1(axis1) {}
	
	/*! calculates standard surface area heuristic for the split */
	__forceinline float splitSAH() const { return sah; }

        /*! test if this split is valid */
        __forceinline bool valid() const { return sah != float(inf); }
		
      public:
	float sah;             //!< SAH cost of the split
	Vec3fa axis0, axis1;   //!< axis the two strands are aligned into
      };

      __forceinline HeuristicStrandSplit () // FIXME: required?
        : scene(nullptr), prims(nullptr) {}
      
      /*! remember prim array */
      __forceinline HeuristicStrandSplit (Scene* scene, PrimRef* prims)
        : scene(scene), prims(prims) {}
      
      __forceinline const Vec3fa direction(const PrimRef& prim) {
        return scene->get(prim.geomID())->computeDirection(prim.primID());
      }
      
      __forceinline const BBox3fa bounds(const PrimRef& prim) {
        return scene->get(prim.geomID())->vbounds(prim.primID());
      }

      __forceinline const BBox3fa bounds(const LinearSpace3fa& space, const PrimRef& prim) {
        return scene->get(prim.geomID())->vbounds(space,prim.primID());
      }

      /*! finds the best split */
      const Split find(const range<size_t>& set, size_t logBlockSize)
      {
        Vec3fa axis0(0,0,1);
        uint64_t bestGeomPrimID = -1;

        /* curve with minimum ID determines first axis */
        for (size_t i=set.begin(); i<set.end(); i++)
        {
          const uint64_t geomprimID = prims[i].ID64();
          if (geomprimID >= bestGeomPrimID) continue;
          const Vec3fa axis = direction(prims[i]);
          if (sqr_length(axis) > 1E-18f) {
            axis0 = normalize(axis);
            bestGeomPrimID = geomprimID;
          }
        }
      
        /* find 2nd axis that is most misaligned with first axis and has minimum ID */
        float bestCos = 1.0f;
        Vec3fa axis1 = axis0;
        bestGeomPrimID = -1;
        for (size_t i=set.begin(); i<set.end(); i++) 
        {
          const uint64_t geomprimID = prims[i].ID64();
          Vec3fa axisi = direction(prims[i]);
          float leni = length(axisi);
          if (leni == 0.0f) continue;
          axisi /= leni;
          float cos = abs(dot(axisi,axis0));
          if ((cos == bestCos && (geomprimID < bestGeomPrimID)) || cos < bestCos) {
            bestCos = cos; axis1 = axisi;
            bestGeomPrimID = geomprimID;
          }
        }
      
        /* partition the two strands */
        size_t lnum = 0, rnum = 0;
        BBox3fa lbounds = empty, rbounds = empty;
        const LinearSpace3fa space0 = frame(axis0).transposed();
        const LinearSpace3fa space1 = frame(axis1).transposed();
        
        for (size_t i=set.begin(); i<set.end(); i++)
        {
          PrimRef& prim = prims[i];
          const Vec3fa axisi = normalize(direction(prim));
          const float cos0 = abs(dot(axisi,axis0));
          const float cos1 = abs(dot(axisi,axis1));
          
          if (cos0 > cos1) { lnum++; lbounds.extend(bounds(space0,prim)); }
          else             { rnum++; rbounds.extend(bounds(space1,prim)); }
        }
      
        /*! return an invalid split if we do not partition */
        if (lnum == 0 || rnum == 0) 
          return Split(inf,axis0,axis1);
      
        /*! calculate sah for the split */
        const size_t lblocks = (lnum+(1ull<<logBlockSize)-1ull) >> logBlockSize;
        const size_t rblocks = (rnum+(1ull<<logBlockSize)-1ull) >> logBlockSize;
        const float sah = madd(float(lblocks),halfArea(lbounds),float(rblocks)*halfArea(rbounds));
        return Split(sah,axis0,axis1);
      }

      /*! array partitioning */
      void split(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset) 
      {
        if (!split.valid()) {
          deterministic_order(set);
          return splitFallback(set,lset,rset);
        }
        
        const size_t begin = set.begin();
        const size_t end   = set.end();
        CentGeomBBox3fa local_left(empty);
        CentGeomBBox3fa local_right(empty);

        auto primOnLeftSide = [&] (const PrimRef& prim) -> bool { 
          const Vec3fa axisi = normalize(direction(prim));
          const float cos0 = abs(dot(axisi,split.axis0));
          const float cos1 = abs(dot(axisi,split.axis1));
          return cos0 > cos1;
        };

        auto mergePrimBounds = [this] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { 
          pinfo.extend(bounds(ref)); 
        };
        
        size_t center = serial_partitioning(prims,begin,end,local_left,local_right,primOnLeftSide,mergePrimBounds);
        
        new (&lset) PrimInfoRange(begin,center,local_left);
        new (&rset) PrimInfoRange(center,end,local_right);
        assert(area(lset.geomBounds) >= 0.0f);
        assert(area(rset.geomBounds) >= 0.0f);
      }

      void deterministic_order(const Set& set) 
      {
        /* required as parallel partition destroys original primitive order */
        std::sort(&prims[set.begin()],&prims[set.end()]);
      }
      
      void splitFallback(const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
      {
        const size_t begin = set.begin();
        const size_t end   = set.end();
        const size_t center = (begin + end)/2;
        
        CentGeomBBox3fa left(empty);
        for (size_t i=begin; i<center; i++)
          left.extend(bounds(prims[i]));
        new (&lset) PrimInfoRange(begin,center,left);
        
        CentGeomBBox3fa right(empty);
        for (size_t i=center; i<end; i++)
          right.extend(bounds(prims[i]));	
        new (&rset) PrimInfoRange(center,end,right);
      }
      
    private:
      Scene* const scene;
      PrimRef* const prims;
    };
  }
}