summaryrefslogtreecommitdiff
path: root/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h
blob: 883cedc3d2a4dbb51a89b856fc2265f860d3a5cb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "../common/ray.h"
#include "cylinder.h"
#include "plane.h"
#include "line_intersector.h"
#include "curve_intersector_precalculations.h"

namespace embree
{
  namespace isa
  {
    static const size_t numJacobianIterations = 5;
#if defined(__AVX__)
    static const size_t numBezierSubdivisions = 2;
#else
    static const size_t numBezierSubdivisions = 3;
#endif

    struct BezierCurveHit
    {
      __forceinline BezierCurveHit() {}

      __forceinline BezierCurveHit(const float t, const float u, const Vec3fa& Ng)
        : t(t), u(u), v(0.0f), Ng(Ng) {}

      __forceinline BezierCurveHit(const float t, const float u, const float v, const Vec3fa& Ng)
        : t(t), u(u), v(v), Ng(Ng) {}
      
      __forceinline void finalize() {}
      
    public:
      float t;
      float u;
      float v; 
      Vec3fa Ng;
    };
    
    template<typename NativeCurve3ff, typename Ray, typename Epilog>
    __forceinline bool intersect_bezier_iterative_debug(const Ray& ray, const float dt, const NativeCurve3ff& curve, size_t i,
                                                        const vfloatx& u, const BBox<vfloatx>& tp, const BBox<vfloatx>& h0, const BBox<vfloatx>& h1, 
                                                        const Vec3vfx& Ng, const Vec4vfx& dP0du, const Vec4vfx& dP3du,
                                                        const Epilog& epilog)
    {
      if (tp.lower[i]+dt > ray.tfar) return false;
      Vec3fa Ng_o = Vec3fa(Ng.x[i],Ng.y[i],Ng.z[i]);
      if (h0.lower[i] == tp.lower[i]) Ng_o = -Vec3fa(dP0du.x[i],dP0du.y[i],dP0du.z[i]);
      if (h1.lower[i] == tp.lower[i]) Ng_o = +Vec3fa(dP3du.x[i],dP3du.y[i],dP3du.z[i]);
      BezierCurveHit hit(tp.lower[i]+dt,u[i],Ng_o);
      return epilog(hit);
    }

    template<typename NativeCurve3ff, typename Ray, typename Epilog> 
     __forceinline bool intersect_bezier_iterative_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, float u, float t, const Epilog& epilog)
    {
      const Vec3fa org = zero;
      const Vec3fa dir = ray.dir;
      const float length_ray_dir = length(dir);

      /* error of curve evaluations is propertional to largest coordinate */
      const BBox3ff box = curve.bounds();
      const float P_err = 16.0f*float(ulp)*reduce_max(max(abs(box.lower),abs(box.upper)));
     
      for (size_t i=0; i<numJacobianIterations; i++) 
      {
        const Vec3fa Q = madd(Vec3fa(t),dir,org);
        //const Vec3fa dQdu = zero;
        const Vec3fa dQdt = dir;
        const float Q_err = 16.0f*float(ulp)*length_ray_dir*t; // works as org=zero here
           
        Vec3ff P,dPdu,ddPdu; curve.eval(u,P,dPdu,ddPdu);
        //const Vec3fa dPdt = zero;

        const Vec3fa R = Q-P;
        const float len_R = length(R); //reduce_max(abs(R));
        const float R_err = max(Q_err,P_err);
        const Vec3fa dRdu = /*dQdu*/-dPdu;
        const Vec3fa dRdt = dQdt;//-dPdt;

        const Vec3fa T = normalize(dPdu);
        const Vec3fa dTdu = dnormalize(dPdu,ddPdu);
        //const Vec3fa dTdt = zero;
        const float cos_err = P_err/length(dPdu);

        /* Error estimate for dot(R,T):

           dot(R,T) = cos(R,T) |R| |T|
                    = (cos(R,T) +- cos_error) * (|R| +- |R|_err) * (|T| +- |T|_err)
                    = cos(R,T)*|R|*|T| 
                      +- cos(R,T)*(|R|*|T|_err + |T|*|R|_err)
                      +- cos_error*(|R| + |T|)
                      +- lower order terms
           with cos(R,T) being in [0,1] and |T| = 1 we get:
             dot(R,T)_err = |R|*|T|_err + |R|_err = cos_error*(|R|+1)
        */
              
        const float f = dot(R,T);
        const float f_err = len_R*P_err + R_err + cos_err*(1.0f+len_R);
        const float dfdu = dot(dRdu,T) + dot(R,dTdu);
        const float dfdt = dot(dRdt,T);// + dot(R,dTdt);

        const float K = dot(R,R)-sqr(f);
        const float dKdu = /*2.0f*/(dot(R,dRdu)-f*dfdu);
        const float dKdt = /*2.0f*/(dot(R,dRdt)-f*dfdt);
        const float rsqrt_K = rsqrt(K);

        const float g = sqrt(K)-P.w;
        const float g_err = R_err + f_err + 16.0f*float(ulp)*box.upper.w;
        const float dgdu = /*0.5f*/dKdu*rsqrt_K-dPdu.w;
        const float dgdt = /*0.5f*/dKdt*rsqrt_K;//-dPdt.w;

        const LinearSpace2f J = LinearSpace2f(dfdu,dfdt,dgdu,dgdt);
        const Vec2f dut = rcp(J)*Vec2f(f,g);
        const Vec2f ut = Vec2f(u,t) - dut;
        u = ut.x; t = ut.y;

        if (abs(f) < f_err && abs(g) < g_err)
        {
          t+=dt;
          if (!(ray.tnear() <= t && t <= ray.tfar)) return false; // rejects NaNs
          if (!(u >= 0.0f && u <= 1.0f)) return false; // rejects NaNs
          const Vec3fa R = normalize(Q-P);
          const Vec3fa U = madd(Vec3fa(dPdu.w),R,dPdu);
          const Vec3fa V = cross(dPdu,R);
          BezierCurveHit hit(t,u,cross(V,U));
          return epilog(hit);
        }
      }
      return false;
    }

    template<typename NativeCurve3ff, typename Ray, typename Epilog>
    bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve,
                                             float u0, float u1, unsigned int depth, const Epilog& epilog)
    {
#if defined(__AVX__)
      typedef vbool8 vboolx; // maximally 8-wide to work around KNL issues
      typedef vint8 vintx; 
      typedef vfloat8 vfloatx;
#else
      typedef vbool4 vboolx;
      typedef vint4 vintx; 
      typedef vfloat4 vfloatx;
#endif
      typedef Vec3<vfloatx> Vec3vfx;
      typedef Vec4<vfloatx> Vec4vfx;
    
      unsigned int maxDepth = numBezierSubdivisions;
      bool found = false;
      const Vec3fa org = zero;
      const Vec3fa dir = ray.dir;

      unsigned int sptr = 0;
      const unsigned int stack_size = numBezierSubdivisions+1; // +1 because of unstable workaround below
      struct StackEntry {
        vboolx valid;
        vfloatx tlower;
        float u0;
        float u1;
        unsigned int depth;
      };
      StackEntry stack[stack_size];
      goto entry;

       /* terminate if stack is empty */
      while (sptr)
      {
        /* pop from stack */
        {
          sptr--;
          vboolx valid = stack[sptr].valid;
          const vfloatx tlower = stack[sptr].tlower;
          valid &= tlower+dt <= ray.tfar;
          if (none(valid)) continue;
          u0 = stack[sptr].u0;
          u1 = stack[sptr].u1;
          depth = stack[sptr].depth;
          const size_t i = select_min(valid,tlower); clear(valid,i);
          stack[sptr].valid = valid;
          if (any(valid)) sptr++; // there are still items on the stack

          /* process next segment */
          const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1)));
          u0 = vu0[i+0];
          u1 = vu0[i+1];
        }
      entry:

        /* subdivide curve */
        const float dscale = (u1-u0)*(1.0f/(3.0f*(vfloatx::size-1)));
        const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1)));
        Vec4vfx P0, dP0du; curve.veval(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx(dscale);
        const Vec4vfx P3 = shift_right_1(P0);
        const Vec4vfx dP3du = shift_right_1(dP0du); 
        const Vec4vfx P1 = P0 + dP0du; 
        const Vec4vfx P2 = P3 - dP3du;
        
        /* calculate bounding cylinders */
        const vfloatx rr1 = sqr_point_to_line_distance(Vec3vfx(dP0du),Vec3vfx(P3-P0));
        const vfloatx rr2 = sqr_point_to_line_distance(Vec3vfx(dP3du),Vec3vfx(P3-P0));
        const vfloatx maxr12 = sqrt(max(rr1,rr2));
        const vfloatx one_plus_ulp  = 1.0f+2.0f*float(ulp);
        const vfloatx one_minus_ulp = 1.0f-2.0f*float(ulp);
        vfloatx r_outer = max(P0.w,P1.w,P2.w,P3.w)+maxr12;
        vfloatx r_inner = min(P0.w,P1.w,P2.w,P3.w)-maxr12;
        r_outer = one_plus_ulp*r_outer;
        r_inner = max(0.0f,one_minus_ulp*r_inner);
        const CylinderN<vfloatx::size> cylinder_outer(Vec3vfx(P0),Vec3vfx(P3),r_outer);
        const CylinderN<vfloatx::size> cylinder_inner(Vec3vfx(P0),Vec3vfx(P3),r_inner);
        vboolx valid = true; clear(valid,vfloatx::size-1);
        
        /* intersect with outer cylinder */
        BBox<vfloatx> tc_outer; vfloatx u_outer0; Vec3vfx Ng_outer0; vfloatx u_outer1; Vec3vfx Ng_outer1;
        valid &= cylinder_outer.intersect(org,dir,tc_outer,u_outer0,Ng_outer0,u_outer1,Ng_outer1);
        if (none(valid)) continue;
        
        /* intersect with cap-planes */
        BBox<vfloatx> tp(ray.tnear()-dt,ray.tfar-dt);
        tp = embree::intersect(tp,tc_outer);
        BBox<vfloatx> h0 = HalfPlaneN<vfloatx::size>(Vec3vfx(P0),+Vec3vfx(dP0du)).intersect(org,dir);
        tp = embree::intersect(tp,h0);
        BBox<vfloatx> h1 = HalfPlaneN<vfloatx::size>(Vec3vfx(P3),-Vec3vfx(dP3du)).intersect(org,dir);
        tp = embree::intersect(tp,h1);
        valid &= tp.lower <= tp.upper;
        if (none(valid)) continue;
        
        /* clamp and correct u parameter */
        u_outer0 = clamp(u_outer0,vfloatx(0.0f),vfloatx(1.0f));
        u_outer1 = clamp(u_outer1,vfloatx(0.0f),vfloatx(1.0f));
        u_outer0 = lerp(u0,u1,(vfloatx(step)+u_outer0)*(1.0f/float(vfloatx::size)));
        u_outer1 = lerp(u0,u1,(vfloatx(step)+u_outer1)*(1.0f/float(vfloatx::size)));
        
        /* intersect with inner cylinder */
        BBox<vfloatx> tc_inner;
        vfloatx u_inner0 = zero; Vec3vfx Ng_inner0 = zero; vfloatx u_inner1 = zero; Vec3vfx Ng_inner1 = zero;
        const vboolx valid_inner = cylinder_inner.intersect(org,dir,tc_inner,u_inner0,Ng_inner0,u_inner1,Ng_inner1);
        
        /* at the unstable area we subdivide deeper */
        const vboolx unstable0 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner0)) < 0.3f);
        const vboolx unstable1 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner1)) < 0.3f);
      
        /* subtract the inner interval from the current hit interval */
        BBox<vfloatx> tp0, tp1;
        subtract(tp,tc_inner,tp0,tp1);
        vboolx valid0 = valid & (tp0.lower <= tp0.upper);
        vboolx valid1 = valid & (tp1.lower <= tp1.upper);
        if (none(valid0 | valid1)) continue;
        
        /* iterate over all first hits front to back */
        const vintx termDepth0 = select(unstable0,vintx(maxDepth+1),vintx(maxDepth));
        vboolx recursion_valid0 = valid0 & (depth < termDepth0);
        valid0 &= depth >= termDepth0;
        
        while (any(valid0))
        {
          const size_t i = select_min(valid0,tp0.lower); clear(valid0,i);
          found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0[i],tp0.lower[i],epilog);
          //found = found | intersect_bezier_iterative_debug   (ray,dt,curve,i,u_outer0,tp0,h0,h1,Ng_outer0,dP0du,dP3du,epilog);
          valid0 &= tp0.lower+dt <= ray.tfar;
        }
        valid1 &= tp1.lower+dt <= ray.tfar;
        
        /* iterate over all second hits front to back */
        const vintx termDepth1 = select(unstable1,vintx(maxDepth+1),vintx(maxDepth));
        vboolx recursion_valid1 = valid1 & (depth < termDepth1);
        valid1 &= depth >= termDepth1;
        while (any(valid1))
        {
          const size_t i = select_min(valid1,tp1.lower); clear(valid1,i);
          found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1[i],tp1.upper[i],epilog);
          //found = found | intersect_bezier_iterative_debug   (ray,dt,curve,i,u_outer1,tp1,h0,h1,Ng_outer1,dP0du,dP3du,epilog);
          valid1 &= tp1.lower+dt <= ray.tfar;
        }

        /* push valid segments to stack */
        recursion_valid0 &= tp0.lower+dt <= ray.tfar;
        recursion_valid1 &= tp1.lower+dt <= ray.tfar;
        const vboolx recursion_valid = recursion_valid0 | recursion_valid1;
        if (any(recursion_valid))
        {
          assert(sptr < stack_size);
          stack[sptr].valid = recursion_valid;
          stack[sptr].tlower = select(recursion_valid0,tp0.lower,tp1.lower);
          stack[sptr].u0 = u0;
          stack[sptr].u1 = u1;
          stack[sptr].depth = depth+1;
          sptr++;
        }
      }
      return found;
    }

    template<template<typename Ty> class NativeCurve>
    struct SweepCurve1Intersector1
    {
      typedef NativeCurve<Vec3ff> NativeCurve3ff;
      
      template<typename Epilog>
      __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
                                IntersectContext* context,
                                const CurveGeometry* geom, const unsigned int primID,
                                const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
                                const Epilog& epilog)
      {
        STAT3(normal.trav_prims,1,1,1);

        /* move ray closer to make intersection stable */
        NativeCurve3ff curve0(v0,v1,v2,v3);
        curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
        const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
        const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f);
        const NativeCurve3ff curve1 = curve0-ref;
        return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog);
      }
    };

    template<template<typename Ty> class NativeCurve, int K>
    struct SweepCurve1IntersectorK
    {
      typedef NativeCurve<Vec3ff> NativeCurve3ff;
      
      struct Ray1
      {
        __forceinline Ray1(RayK<K>& ray, size_t k)
          : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {}

        Vec3fa org;
        Vec3fa dir;
        float _tnear;
        float& tfar;

        __forceinline float& tnear() { return _tnear; }
        //__forceinline float& tfar()  { return _tfar; }
        __forceinline const float& tnear() const { return _tnear; }
        //__forceinline const float& tfar()  const { return _tfar; }
        
      };

      template<typename Epilog>
      __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
                                   IntersectContext* context,
                                   const CurveGeometry* geom, const unsigned int primID,
                                   const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
                                   const Epilog& epilog)
      {
        STAT3(normal.trav_prims,1,1,1);
        Ray1 ray(vray,k);

        /* move ray closer to make intersection stable */
        NativeCurve3ff curve0(v0,v1,v2,v3);
        curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
        const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
        const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f);
        const NativeCurve3ff curve1 = curve0-ref;
        return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog);
      }
    };
  }
}