summaryrefslogtreecommitdiff
path: root/thirdparty/embree-aarch64/kernels
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/embree-aarch64/kernels')
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h411
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/bvh_builder_morton.h501
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h692
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur_hair.h526
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/bvh_builder_sah.h669
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/heuristic_binning.h972
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_aligned.h205
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_unaligned.h302
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/heuristic_openmerge_array.h443
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/heuristic_spatial.h414
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/heuristic_spatial_array.h552
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/heuristic_strand_array.h188
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/heuristic_timesplit_array.h237
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/priminfo.h362
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/primrefgen.cpp244
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/primrefgen.h28
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/primrefgen_presplit.h371
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/splitter.h169
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh.cpp190
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh.h235
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.cpp1325
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.h316
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.cpp1165
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.h280
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder.cpp60
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder.h114
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder_morton.cpp531
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah.cpp640
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_mb.cpp705
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_spatial.cpp201
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.cpp377
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.h263
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel_internal.h267
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_collider.cpp375
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_collider.h72
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_factory.h21
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.cpp330
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.h37
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1_bvh4.cpp61
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_hybrid.h61
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream.h295
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream_filters.h41
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb.h213
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb.h247
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb4d.h107
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_node_base.h43
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb.h98
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb_mb.h90
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_node_qaabb.h265
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_node_ref.h242
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_refit.cpp247
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_refit.h95
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.cpp127
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.h37
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp168
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.h285
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_traverser1.h676
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/bvh_traverser_stream.h154
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/node_intersector.h31
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/node_intersector1.h1788
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/node_intersector_frustum.h269
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet.h843
-rw-r--r--thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet_stream.h215
-rw-r--r--thirdparty/embree-aarch64/kernels/common/accel.h556
-rw-r--r--thirdparty/embree-aarch64/kernels/common/accelinstance.h41
-rw-r--r--thirdparty/embree-aarch64/kernels/common/acceln.cpp232
-rw-r--r--thirdparty/embree-aarch64/kernels/common/acceln.h49
-rw-r--r--thirdparty/embree-aarch64/kernels/common/accelset.cpp17
-rw-r--r--thirdparty/embree-aarch64/kernels/common/accelset.h248
-rw-r--r--thirdparty/embree-aarch64/kernels/common/alloc.cpp82
-rw-r--r--thirdparty/embree-aarch64/kernels/common/alloc.h1006
-rw-r--r--thirdparty/embree-aarch64/kernels/common/buffer.h263
-rw-r--r--thirdparty/embree-aarch64/kernels/common/builder.h60
-rw-r--r--thirdparty/embree-aarch64/kernels/common/context.h131
-rw-r--r--thirdparty/embree-aarch64/kernels/common/default.h273
-rw-r--r--thirdparty/embree-aarch64/kernels/common/device.cpp567
-rw-r--r--thirdparty/embree-aarch64/kernels/common/device.h85
-rw-r--r--thirdparty/embree-aarch64/kernels/common/geometry.cpp259
-rw-r--r--thirdparty/embree-aarch64/kernels/common/geometry.h582
-rw-r--r--thirdparty/embree-aarch64/kernels/common/hit.h114
-rw-r--r--thirdparty/embree-aarch64/kernels/common/instance_stack.h199
-rw-r--r--thirdparty/embree-aarch64/kernels/common/isa.h271
-rw-r--r--thirdparty/embree-aarch64/kernels/common/motion_derivative.h325
-rw-r--r--thirdparty/embree-aarch64/kernels/common/point_query.h136
-rw-r--r--thirdparty/embree-aarch64/kernels/common/primref.h138
-rw-r--r--thirdparty/embree-aarch64/kernels/common/primref_mb.h262
-rw-r--r--thirdparty/embree-aarch64/kernels/common/profile.h159
-rw-r--r--thirdparty/embree-aarch64/kernels/common/ray.h1517
-rw-r--r--thirdparty/embree-aarch64/kernels/common/rtcore.cpp1799
-rw-r--r--thirdparty/embree-aarch64/kernels/common/rtcore.h142
-rw-r--r--thirdparty/embree-aarch64/kernels/common/rtcore_builder.cpp442
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene.cpp976
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene.h390
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_curves.h341
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_grid_mesh.h215
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_instance.h272
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_line_segments.h307
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_points.h282
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_quad_mesh.h277
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_subdiv_mesh.h326
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.cpp243
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.h264
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_user_geometry.h77
-rw-r--r--thirdparty/embree-aarch64/kernels/common/stack_item.h125
-rw-r--r--thirdparty/embree-aarch64/kernels/common/stat.cpp128
-rw-r--r--thirdparty/embree-aarch64/kernels/common/stat.h116
-rw-r--r--thirdparty/embree-aarch64/kernels/common/state.cpp543
-rw-r--r--thirdparty/embree-aarch64/kernels/common/state.h197
-rw-r--r--thirdparty/embree-aarch64/kernels/common/vector.h76
-rw-r--r--thirdparty/embree-aarch64/kernels/config.h76
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/cone.h321
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/coneline_intersector.h209
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/conelinei_intersector.h141
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curveNi.h222
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curveNi_intersector.h569
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curveNi_mb.h278
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curveNi_mb_intersector.h516
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curveNv.h101
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curveNv_intersector.h181
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector.h98
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_distance.h129
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_oriented.h417
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_precalculations.h49
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h214
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h362
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual.h671
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h21
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h21
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h21
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h21
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h21
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h22
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/cylinder.h223
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/disc_intersector.h216
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/disci_intersector.h277
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/filter.h204
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/grid_intersector.h99
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/grid_soa.h275
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector1.h207
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector_packet.h445
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/instance.h78
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/instance_intersector.h84
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/intersector_epilog.h1074
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/intersector_iterators.h172
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/line_intersector.h141
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/linei.h709
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/linei_intersector.h124
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/object.h84
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/object_intersector.h127
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/plane.h57
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/pointi.h417
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/primitive.h49
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/primitive4.cpp379
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/quad_intersector.h76
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/quad_intersector_moeller.h566
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/quad_intersector_pluecker.h529
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/quadi.h483
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/quadi_intersector.h350
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/quadv.h165
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/quadv_intersector.h181
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/roundline_intersector.h710
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/roundlinei_intersector.h136
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/sphere_intersector.h183
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/spherei_intersector.h156
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/subdivpatch1.h38
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/subdivpatch1_intersector.h237
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/subgrid.h517
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector.h518
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h493
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h508
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/subgrid_mb_intersector.h236
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/triangle.h162
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/triangle_intersector.h96
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h403
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_pluecker.h247
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_woop.h418
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/triangle_triangle_intersector.h132
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/trianglei.h442
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/trianglei_intersector.h336
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/trianglev.h157
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/trianglev_intersector.h206
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/trianglev_mb.h201
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/trianglev_mb_intersector.h211
-rw-r--r--thirdparty/embree-aarch64/kernels/hash.h5
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/bezier_curve.h669
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/bezier_patch.h372
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/bilinear_patch.h191
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/bspline_curve.h319
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/bspline_patch.h449
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/catmullclark_coefficients.h85
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/catmullclark_patch.h562
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/catmullclark_ring.h826
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/catmullrom_curve.h296
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval.h226
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval_grid.h359
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval_simd.h186
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/gregory_patch.h893
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/gregory_patch_dense.h113
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/gridrange.h96
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/half_edge.h371
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/hermite_curve.h38
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/linear_bezier_patch.h403
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/patch.h371
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/patch_eval.h129
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/patch_eval_grid.h245
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/patch_eval_simd.h127
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/subdivpatch1base.h156
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/tessellation.h161
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/tessellation_cache.h325
209 files changed, 0 insertions, 63786 deletions
diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h
deleted file mode 100644
index 755ce255fb..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h
+++ /dev/null
@@ -1,411 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../bvh/bvh.h"
-#include "../geometry/primitive.h"
-#include "../builders/bvh_builder_sah.h"
-#include "../builders/heuristic_binning_array_aligned.h"
-#include "../builders/heuristic_binning_array_unaligned.h"
-#include "../builders/heuristic_strand_array.h"
-
-#define NUM_HAIR_OBJECT_BINS 32
-
-namespace embree
-{
- namespace isa
- {
- struct BVHBuilderHair
- {
- /*! settings for builder */
- struct Settings
- {
- /*! default settings */
- Settings ()
- : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), finished_range_threshold(inf) {}
-
- public:
- size_t branchingFactor; //!< branching factor of BVH to build
- size_t maxDepth; //!< maximum depth of BVH to build
- size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
- size_t minLeafSize; //!< minimum size of a leaf
- size_t maxLeafSize; //!< maximum size of a leaf
- size_t finished_range_threshold; //!< finished range threshold
- };
-
- template<typename NodeRef,
- typename CreateAllocFunc,
- typename CreateAABBNodeFunc,
- typename SetAABBNodeFunc,
- typename CreateOBBNodeFunc,
- typename SetOBBNodeFunc,
- typename CreateLeafFunc,
- typename ProgressMonitor,
- typename ReportFinishedRangeFunc>
-
- class BuilderT
- {
- ALIGNED_CLASS_(16);
- friend struct BVHBuilderHair;
-
- typedef FastAllocator::CachedAllocator Allocator;
- typedef HeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> HeuristicBinningSAH;
- typedef UnalignedHeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> UnalignedHeuristicBinningSAH;
- typedef HeuristicStrandSplit HeuristicStrandSplitSAH;
-
- static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
- static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
- static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build
-
- static const size_t travCostAligned = 1;
- static const size_t travCostUnaligned = 5;
- static const size_t intCost = 6;
-
- BuilderT (Scene* scene,
- PrimRef* prims,
- const CreateAllocFunc& createAlloc,
- const CreateAABBNodeFunc& createAABBNode,
- const SetAABBNodeFunc& setAABBNode,
- const CreateOBBNodeFunc& createOBBNode,
- const SetOBBNodeFunc& setOBBNode,
- const CreateLeafFunc& createLeaf,
- const ProgressMonitor& progressMonitor,
- const ReportFinishedRangeFunc& reportFinishedRange,
- const Settings settings)
-
- : cfg(settings),
- prims(prims),
- createAlloc(createAlloc),
- createAABBNode(createAABBNode),
- setAABBNode(setAABBNode),
- createOBBNode(createOBBNode),
- setOBBNode(setOBBNode),
- createLeaf(createLeaf),
- progressMonitor(progressMonitor),
- reportFinishedRange(reportFinishedRange),
- alignedHeuristic(prims), unalignedHeuristic(scene,prims), strandHeuristic(scene,prims) {}
-
- /*! checks if all primitives are from the same geometry */
- __forceinline bool sameGeometry(const PrimInfoRange& range)
- {
- if (range.size() == 0) return true;
- unsigned int firstGeomID = prims[range.begin()].geomID();
- for (size_t i=range.begin()+1; i<range.end(); i++) {
- if (prims[i].geomID() != firstGeomID){
- return false;
- }
- }
- return true;
- }
-
- /*! creates a large leaf that could be larger than supported by the BVH */
- NodeRef createLargeLeaf(size_t depth, const PrimInfoRange& pinfo, Allocator alloc)
- {
- /* this should never occur but is a fatal error */
- if (depth > cfg.maxDepth)
- throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
-
- /* create leaf for few primitives */
- if (pinfo.size() <= cfg.maxLeafSize && sameGeometry(pinfo))
- return createLeaf(prims,pinfo,alloc);
-
- /* fill all children by always splitting the largest one */
- PrimInfoRange children[MAX_BRANCHING_FACTOR];
- unsigned numChildren = 1;
- children[0] = pinfo;
-
- do {
-
- /* find best child with largest bounding box area */
- int bestChild = -1;
- size_t bestSize = 0;
- for (unsigned i=0; i<numChildren; i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i]))
- continue;
-
- /* remember child with largest size */
- if (children[i].size() > bestSize) {
- bestSize = children[i].size();
- bestChild = i;
- }
- }
- if (bestChild == -1) break;
-
- /*! split best child into left and right child */
- __aligned(64) PrimInfoRange left, right;
- if (!sameGeometry(children[bestChild])) {
- alignedHeuristic.splitByGeometry(children[bestChild],left,right);
- } else {
- alignedHeuristic.splitFallback(children[bestChild],left,right);
- }
-
- /* add new children left and right */
- children[bestChild] = children[numChildren-1];
- children[numChildren-1] = left;
- children[numChildren+0] = right;
- numChildren++;
-
- } while (numChildren < cfg.branchingFactor);
-
- /* create node */
- auto node = createAABBNode(alloc);
-
- for (size_t i=0; i<numChildren; i++) {
- const NodeRef child = createLargeLeaf(depth+1,children[i],alloc);
- setAABBNode(node,i,child,children[i].geomBounds);
- }
-
- return node;
- }
-
- /*! performs split */
- __noinline void split(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo, bool& aligned) // FIXME: not inlined as ICC otherwise uses much stack
- {
- /* variable to track the SAH of the best splitting approach */
- float bestSAH = inf;
- const size_t blocks = (pinfo.size()+(1ull<<cfg.logBlockSize)-1ull) >> cfg.logBlockSize;
- const float leafSAH = intCost*float(blocks)*halfArea(pinfo.geomBounds);
-
- /* try standard binning in aligned space */
- float alignedObjectSAH = inf;
- HeuristicBinningSAH::Split alignedObjectSplit;
- if (aligned) {
- alignedObjectSplit = alignedHeuristic.find(pinfo,cfg.logBlockSize);
- alignedObjectSAH = travCostAligned*halfArea(pinfo.geomBounds) + intCost*alignedObjectSplit.splitSAH();
- bestSAH = min(alignedObjectSAH,bestSAH);
- }
-
- /* try standard binning in unaligned space */
- UnalignedHeuristicBinningSAH::Split unalignedObjectSplit;
- LinearSpace3fa uspace;
- float unalignedObjectSAH = inf;
- if (bestSAH > 0.7f*leafSAH) {
- uspace = unalignedHeuristic.computeAlignedSpace(pinfo);
- const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(pinfo,uspace);
- unalignedObjectSplit = unalignedHeuristic.find(sinfo,cfg.logBlockSize,uspace);
- unalignedObjectSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*unalignedObjectSplit.splitSAH();
- bestSAH = min(unalignedObjectSAH,bestSAH);
- }
-
- /* try splitting into two strands */
- HeuristicStrandSplitSAH::Split strandSplit;
- float strandSAH = inf;
- if (bestSAH > 0.7f*leafSAH && pinfo.size() <= 256) {
- strandSplit = strandHeuristic.find(pinfo,cfg.logBlockSize);
- strandSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*strandSplit.splitSAH();
- bestSAH = min(strandSAH,bestSAH);
- }
-
- /* fallback if SAH heuristics failed */
- if (unlikely(!std::isfinite(bestSAH)))
- {
- alignedHeuristic.deterministic_order(pinfo);
- alignedHeuristic.splitFallback(pinfo,linfo,rinfo);
- }
-
- /* perform aligned split if this is best */
- else if (bestSAH == alignedObjectSAH) {
- alignedHeuristic.split(alignedObjectSplit,pinfo,linfo,rinfo);
- }
-
- /* perform unaligned split if this is best */
- else if (bestSAH == unalignedObjectSAH) {
- unalignedHeuristic.split(unalignedObjectSplit,uspace,pinfo,linfo,rinfo);
- aligned = false;
- }
-
- /* perform strand split if this is best */
- else if (bestSAH == strandSAH) {
- strandHeuristic.split(strandSplit,pinfo,linfo,rinfo);
- aligned = false;
- }
-
- /* can never happen */
- else
- assert(false);
- }
-
- /*! recursive build */
- NodeRef recurse(size_t depth, const PrimInfoRange& pinfo, Allocator alloc, bool toplevel, bool alloc_barrier)
- {
- /* get thread local allocator */
- if (!alloc)
- alloc = createAlloc();
-
- /* call memory monitor function to signal progress */
- if (toplevel && pinfo.size() <= SINGLE_THREADED_THRESHOLD)
- progressMonitor(pinfo.size());
-
- PrimInfoRange children[MAX_BRANCHING_FACTOR];
-
- /* create leaf node */
- if (depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || pinfo.size() <= cfg.minLeafSize) {
- alignedHeuristic.deterministic_order(pinfo);
- return createLargeLeaf(depth,pinfo,alloc);
- }
-
- /* fill all children by always splitting the one with the largest surface area */
- size_t numChildren = 1;
- children[0] = pinfo;
- bool aligned = true;
-
- do {
-
- /* find best child with largest bounding box area */
- ssize_t bestChild = -1;
- float bestArea = neg_inf;
- for (size_t i=0; i<numChildren; i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].size() <= cfg.minLeafSize)
- continue;
-
- /* remember child with largest area */
- if (area(children[i].geomBounds) > bestArea) {
- bestArea = area(children[i].geomBounds);
- bestChild = i;
- }
- }
- if (bestChild == -1) break;
-
- /*! split best child into left and right child */
- PrimInfoRange left, right;
- split(children[bestChild],left,right,aligned);
-
- /* add new children left and right */
- children[bestChild] = children[numChildren-1];
- children[numChildren-1] = left;
- children[numChildren+0] = right;
- numChildren++;
-
- } while (numChildren < cfg.branchingFactor);
-
- NodeRef node;
-
- /* create aligned node */
- if (aligned)
- {
- node = createAABBNode(alloc);
-
- /* spawn tasks or ... */
- if (pinfo.size() > SINGLE_THREADED_THRESHOLD)
- {
- parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
- setAABBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),children[i].geomBounds);
- _mm_mfence(); // to allow non-temporal stores during build
- }
- });
- }
- /* ... continue sequentially */
- else {
- for (size_t i=0; i<numChildren; i++) {
- const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
- setAABBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),children[i].geomBounds);
- }
- }
- }
-
- /* create unaligned node */
- else
- {
- node = createOBBNode(alloc);
-
- /* spawn tasks or ... */
- if (pinfo.size() > SINGLE_THREADED_THRESHOLD)
- {
- parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);
- const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);
- const OBBox3fa obounds(space,sinfo.geomBounds);
- const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
- setOBBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),obounds);
- _mm_mfence(); // to allow non-temporal stores during build
- }
- });
- }
- /* ... continue sequentially */
- else
- {
- for (size_t i=0; i<numChildren; i++) {
- const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);
- const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);
- const OBBox3fa obounds(space,sinfo.geomBounds);
- const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
- setOBBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),obounds);
- }
- }
- }
-
- /* reports a finished range of primrefs */
- if (unlikely(alloc_barrier))
- reportFinishedRange(pinfo);
-
- return node;
- }
-
- private:
- Settings cfg;
- PrimRef* prims;
- const CreateAllocFunc& createAlloc;
- const CreateAABBNodeFunc& createAABBNode;
- const SetAABBNodeFunc& setAABBNode;
- const CreateOBBNodeFunc& createOBBNode;
- const SetOBBNodeFunc& setOBBNode;
- const CreateLeafFunc& createLeaf;
- const ProgressMonitor& progressMonitor;
- const ReportFinishedRangeFunc& reportFinishedRange;
-
- private:
- HeuristicBinningSAH alignedHeuristic;
- UnalignedHeuristicBinningSAH unalignedHeuristic;
- HeuristicStrandSplitSAH strandHeuristic;
- };
-
- template<typename NodeRef,
- typename CreateAllocFunc,
- typename CreateAABBNodeFunc,
- typename SetAABBNodeFunc,
- typename CreateOBBNodeFunc,
- typename SetOBBNodeFunc,
- typename CreateLeafFunc,
- typename ProgressMonitor,
- typename ReportFinishedRangeFunc>
-
- static NodeRef build (const CreateAllocFunc& createAlloc,
- const CreateAABBNodeFunc& createAABBNode,
- const SetAABBNodeFunc& setAABBNode,
- const CreateOBBNodeFunc& createOBBNode,
- const SetOBBNodeFunc& setOBBNode,
- const CreateLeafFunc& createLeaf,
- const ProgressMonitor& progressMonitor,
- const ReportFinishedRangeFunc& reportFinishedRange,
- Scene* scene,
- PrimRef* prims,
- const PrimInfo& pinfo,
- const Settings settings)
- {
- typedef BuilderT<NodeRef,
- CreateAllocFunc,
- CreateAABBNodeFunc,SetAABBNodeFunc,
- CreateOBBNodeFunc,SetOBBNodeFunc,
- CreateLeafFunc,ProgressMonitor,
- ReportFinishedRangeFunc> Builder;
-
- Builder builder(scene,prims,createAlloc,
- createAABBNode,setAABBNode,
- createOBBNode,setOBBNode,
- createLeaf,progressMonitor,reportFinishedRange,settings);
-
- NodeRef root = builder.recurse(1,pinfo,nullptr,true,false);
- _mm_mfence(); // to allow non-temporal stores during build
- return root;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_morton.h b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_morton.h
deleted file mode 100644
index 92be2f7e65..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_morton.h
+++ /dev/null
@@ -1,501 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/builder.h"
-#include "../../common/algorithms/parallel_reduce.h"
-
-namespace embree
-{
- namespace isa
- {
- struct BVHBuilderMorton
- {
- static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
- static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree of we are that many levels before the maximum tree depth
-
- /*! settings for morton builder */
- struct Settings
- {
- /*! default settings */
- Settings ()
- : branchingFactor(2), maxDepth(32), minLeafSize(1), maxLeafSize(7), singleThreadThreshold(1024) {}
-
- /*! initialize settings from API settings */
- Settings (const RTCBuildArguments& settings)
- : branchingFactor(2), maxDepth(32), minLeafSize(1), maxLeafSize(7), singleThreadThreshold(1024)
- {
- if (RTC_BUILD_ARGUMENTS_HAS(settings,maxBranchingFactor)) branchingFactor = settings.maxBranchingFactor;
- if (RTC_BUILD_ARGUMENTS_HAS(settings,maxDepth )) maxDepth = settings.maxDepth;
- if (RTC_BUILD_ARGUMENTS_HAS(settings,minLeafSize )) minLeafSize = settings.minLeafSize;
- if (RTC_BUILD_ARGUMENTS_HAS(settings,maxLeafSize )) maxLeafSize = settings.maxLeafSize;
-
- minLeafSize = min(minLeafSize,maxLeafSize);
- }
-
- Settings (size_t branchingFactor, size_t maxDepth, size_t minLeafSize, size_t maxLeafSize, size_t singleThreadThreshold)
- : branchingFactor(branchingFactor), maxDepth(maxDepth), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize), singleThreadThreshold(singleThreadThreshold)
- {
- minLeafSize = min(minLeafSize,maxLeafSize);
- }
-
- public:
- size_t branchingFactor; //!< branching factor of BVH to build
- size_t maxDepth; //!< maximum depth of BVH to build
- size_t minLeafSize; //!< minimum size of a leaf
- size_t maxLeafSize; //!< maximum size of a leaf
- size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
- };
-
- /*! Build primitive consisting of morton code and primitive ID. */
- struct __aligned(8) BuildPrim
- {
- union {
- struct {
- unsigned int code; //!< morton code
- unsigned int index; //!< i'th primitive
- };
- uint64_t t;
- };
-
- /*! interface for radix sort */
- __forceinline operator unsigned() const { return code; }
-
- /*! interface for standard sort */
- __forceinline bool operator<(const BuildPrim &m) const { return code < m.code; }
- };
-
- /*! maps bounding box to morton code */
- struct MortonCodeMapping
- {
- static const size_t LATTICE_BITS_PER_DIM = 10;
- static const size_t LATTICE_SIZE_PER_DIM = size_t(1) << LATTICE_BITS_PER_DIM;
-
- vfloat4 base;
- vfloat4 scale;
-
- __forceinline MortonCodeMapping(const BBox3fa& bounds)
- {
- base = (vfloat4)bounds.lower;
- const vfloat4 diag = (vfloat4)bounds.upper - (vfloat4)bounds.lower;
- scale = select(diag > vfloat4(1E-19f), rcp(diag) * vfloat4(LATTICE_SIZE_PER_DIM * 0.99f),vfloat4(0.0f));
- }
-
- __forceinline const vint4 bin (const BBox3fa& box) const
- {
- const vfloat4 lower = (vfloat4)box.lower;
- const vfloat4 upper = (vfloat4)box.upper;
- const vfloat4 centroid = lower+upper;
- return vint4((centroid-base)*scale);
- }
-
- __forceinline unsigned int code (const BBox3fa& box) const
- {
- const vint4 binID = bin(box);
- const unsigned int x = extract<0>(binID);
- const unsigned int y = extract<1>(binID);
- const unsigned int z = extract<2>(binID);
- const unsigned int xyz = bitInterleave(x,y,z);
- return xyz;
- }
- };
-
-#if defined (__AVX2__)
-
- /*! for AVX2 there is a fast scalar bitInterleave */
- struct MortonCodeGenerator
- {
- __forceinline MortonCodeGenerator(const MortonCodeMapping& mapping, BuildPrim* dest)
- : mapping(mapping), dest(dest) {}
-
- __forceinline void operator() (const BBox3fa& b, const unsigned index)
- {
- dest->index = index;
- dest->code = mapping.code(b);
- dest++;
- }
-
- public:
- const MortonCodeMapping mapping;
- BuildPrim* dest;
- size_t currentID;
- };
-
-#else
-
- /*! before AVX2 is it better to use the SSE version of bitInterleave */
- struct MortonCodeGenerator
- {
- __forceinline MortonCodeGenerator(const MortonCodeMapping& mapping, BuildPrim* dest)
- : mapping(mapping), dest(dest), currentID(0), slots(0), ax(0), ay(0), az(0), ai(0) {}
-
- __forceinline ~MortonCodeGenerator()
- {
- if (slots != 0)
- {
- const vint4 code = bitInterleave(ax,ay,az);
- for (size_t i=0; i<slots; i++) {
- dest[currentID-slots+i].index = ai[i];
- dest[currentID-slots+i].code = code[i];
- }
- }
- }
-
- __forceinline void operator() (const BBox3fa& b, const unsigned index)
- {
- const vint4 binID = mapping.bin(b);
- ax[slots] = extract<0>(binID);
- ay[slots] = extract<1>(binID);
- az[slots] = extract<2>(binID);
- ai[slots] = index;
- slots++;
- currentID++;
-
- if (slots == 4)
- {
- const vint4 code = bitInterleave(ax,ay,az);
- vint4::storeu(&dest[currentID-4],unpacklo(code,ai));
- vint4::storeu(&dest[currentID-2],unpackhi(code,ai));
- slots = 0;
- }
- }
-
- public:
- const MortonCodeMapping mapping;
- BuildPrim* dest;
- size_t currentID;
- size_t slots;
- vint4 ax, ay, az, ai;
- };
-
-#endif
-
- template<
- typename ReductionTy,
- typename Allocator,
- typename CreateAllocator,
- typename CreateNodeFunc,
- typename SetNodeBoundsFunc,
- typename CreateLeafFunc,
- typename CalculateBounds,
- typename ProgressMonitor>
-
- class BuilderT : private Settings
- {
- ALIGNED_CLASS_(16);
-
- public:
-
- BuilderT (CreateAllocator& createAllocator,
- CreateNodeFunc& createNode,
- SetNodeBoundsFunc& setBounds,
- CreateLeafFunc& createLeaf,
- CalculateBounds& calculateBounds,
- ProgressMonitor& progressMonitor,
- const Settings& settings)
-
- : Settings(settings),
- createAllocator(createAllocator),
- createNode(createNode),
- setBounds(setBounds),
- createLeaf(createLeaf),
- calculateBounds(calculateBounds),
- progressMonitor(progressMonitor),
- morton(nullptr) {}
-
- ReductionTy createLargeLeaf(size_t depth, const range<unsigned>& current, Allocator alloc)
- {
- /* this should never occur but is a fatal error */
- if (depth > maxDepth)
- throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
-
- /* create leaf for few primitives */
- if (current.size() <= maxLeafSize)
- return createLeaf(current,alloc);
-
- /* fill all children by always splitting the largest one */
- range<unsigned> children[MAX_BRANCHING_FACTOR];
- size_t numChildren = 1;
- children[0] = current;
-
- do {
-
- /* find best child with largest number of primitives */
- size_t bestChild = -1;
- size_t bestSize = 0;
- for (size_t i=0; i<numChildren; i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].size() <= maxLeafSize)
- continue;
-
- /* remember child with largest size */
- if (children[i].size() > bestSize) {
- bestSize = children[i].size();
- bestChild = i;
- }
- }
- if (bestChild == size_t(-1)) break;
-
- /*! split best child into left and right child */
- auto split = children[bestChild].split();
-
- /* add new children left and right */
- children[bestChild] = children[numChildren-1];
- children[numChildren-1] = split.first;
- children[numChildren+0] = split.second;
- numChildren++;
-
- } while (numChildren < branchingFactor);
-
- /* create node */
- auto node = createNode(alloc,numChildren);
-
- /* recurse into each child */
- ReductionTy bounds[MAX_BRANCHING_FACTOR];
- for (size_t i=0; i<numChildren; i++)
- bounds[i] = createLargeLeaf(depth+1,children[i],alloc);
-
- return setBounds(node,bounds,numChildren);
- }
-
- /*! recreates morton codes when reaching a region where all codes are identical */
- __noinline void recreateMortonCodes(const range<unsigned>& current) const
- {
- /* fast path for small ranges */
- if (likely(current.size() < 1024))
- {
- /*! recalculate centroid bounds */
- BBox3fa centBounds(empty);
- for (size_t i=current.begin(); i<current.end(); i++)
- centBounds.extend(center2(calculateBounds(morton[i])));
-
- /* recalculate morton codes */
- MortonCodeMapping mapping(centBounds);
- for (size_t i=current.begin(); i<current.end(); i++)
- morton[i].code = mapping.code(calculateBounds(morton[i]));
-
- /* sort morton codes */
- std::sort(morton+current.begin(),morton+current.end());
- }
- else
- {
- /*! recalculate centroid bounds */
- auto calculateCentBounds = [&] ( const range<unsigned>& r ) {
- BBox3fa centBounds = empty;
- for (size_t i=r.begin(); i<r.end(); i++)
- centBounds.extend(center2(calculateBounds(morton[i])));
- return centBounds;
- };
- const BBox3fa centBounds = parallel_reduce(current.begin(), current.end(), unsigned(1024),
- BBox3fa(empty), calculateCentBounds, BBox3fa::merge);
-
- /* recalculate morton codes */
- MortonCodeMapping mapping(centBounds);
- parallel_for(current.begin(), current.end(), unsigned(1024), [&] ( const range<unsigned>& r ) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- morton[i].code = mapping.code(calculateBounds(morton[i]));
- }
- });
-
- /*! sort morton codes */
-#if defined(TASKING_TBB)
- tbb::parallel_sort(morton+current.begin(),morton+current.end());
-#else
- radixsort32(morton+current.begin(),current.size());
-#endif
- }
- }
-
- __forceinline void split(const range<unsigned>& current, range<unsigned>& left, range<unsigned>& right) const
- {
- const unsigned int code_start = morton[current.begin()].code;
- const unsigned int code_end = morton[current.end()-1].code;
- unsigned int bitpos = lzcnt(code_start^code_end);
-
- /* if all items mapped to same morton code, then re-create new morton codes for the items */
- if (unlikely(bitpos == 32))
- {
- recreateMortonCodes(current);
- const unsigned int code_start = morton[current.begin()].code;
- const unsigned int code_end = morton[current.end()-1].code;
- bitpos = lzcnt(code_start^code_end);
-
- /* if the morton code is still the same, goto fall back split */
- if (unlikely(bitpos == 32)) {
- current.split(left,right);
- return;
- }
- }
-
- /* split the items at the topmost different morton code bit */
- const unsigned int bitpos_diff = 31-bitpos;
- const unsigned int bitmask = 1 << bitpos_diff;
-
- /* find location where bit differs using binary search */
- unsigned begin = current.begin();
- unsigned end = current.end();
- while (begin + 1 != end) {
- const unsigned mid = (begin+end)/2;
- const unsigned bit = morton[mid].code & bitmask;
- if (bit == 0) begin = mid; else end = mid;
- }
- unsigned center = end;
-#if defined(DEBUG)
- for (unsigned int i=begin; i<center; i++) assert((morton[i].code & bitmask) == 0);
- for (unsigned int i=center; i<end; i++) assert((morton[i].code & bitmask) == bitmask);
-#endif
-
- left = make_range(current.begin(),center);
- right = make_range(center,current.end());
- }
-
- ReductionTy recurse(size_t depth, const range<unsigned>& current, Allocator alloc, bool toplevel)
- {
- /* get thread local allocator */
- if (!alloc)
- alloc = createAllocator();
-
- /* call memory monitor function to signal progress */
- if (toplevel && current.size() <= singleThreadThreshold)
- progressMonitor(current.size());
-
- /* create leaf node */
- if (unlikely(depth+MIN_LARGE_LEAF_LEVELS >= maxDepth || current.size() <= minLeafSize))
- return createLargeLeaf(depth,current,alloc);
-
- /* fill all children by always splitting the one with the largest surface area */
- range<unsigned> children[MAX_BRANCHING_FACTOR];
- split(current,children[0],children[1]);
- size_t numChildren = 2;
-
- while (numChildren < branchingFactor)
- {
- /* find best child with largest number of primitives */
- int bestChild = -1;
- unsigned bestItems = 0;
- for (unsigned int i=0; i<numChildren; i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].size() <= minLeafSize)
- continue;
-
- /* remember child with largest area */
- if (children[i].size() > bestItems) {
- bestItems = children[i].size();
- bestChild = i;
- }
- }
- if (bestChild == -1) break;
-
- /*! split best child into left and right child */
- range<unsigned> left, right;
- split(children[bestChild],left,right);
-
- /* add new children left and right */
- children[bestChild] = children[numChildren-1];
- children[numChildren-1] = left;
- children[numChildren+0] = right;
- numChildren++;
- }
-
- /* create leaf node if no split is possible */
- if (unlikely(numChildren == 1))
- return createLeaf(current,alloc);
-
- /* allocate node */
- auto node = createNode(alloc,numChildren);
-
- /* process top parts of tree parallel */
- ReductionTy bounds[MAX_BRANCHING_FACTOR];
- if (current.size() > singleThreadThreshold)
- {
- /*! parallel_for is faster than spawing sub-tasks */
- parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- bounds[i] = recurse(depth+1,children[i],nullptr,true);
- _mm_mfence(); // to allow non-temporal stores during build
- }
- });
- }
-
- /* finish tree sequentially */
- else
- {
- for (size_t i=0; i<numChildren; i++)
- bounds[i] = recurse(depth+1,children[i],alloc,false);
- }
-
- return setBounds(node,bounds,numChildren);
- }
-
- /* build function */
- ReductionTy build(BuildPrim* src, BuildPrim* tmp, size_t numPrimitives)
- {
- /* sort morton codes */
- morton = src;
- radix_sort_u32(src,tmp,numPrimitives,singleThreadThreshold);
-
- /* build BVH */
- const ReductionTy root = recurse(1, range<unsigned>(0,(unsigned)numPrimitives), nullptr, true);
- _mm_mfence(); // to allow non-temporal stores during build
- return root;
- }
-
- public:
- CreateAllocator& createAllocator;
- CreateNodeFunc& createNode;
- SetNodeBoundsFunc& setBounds;
- CreateLeafFunc& createLeaf;
- CalculateBounds& calculateBounds;
- ProgressMonitor& progressMonitor;
-
- public:
- BuildPrim* morton;
- };
-
-
- template<
- typename ReductionTy,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename SetBoundsFunc,
- typename CreateLeafFunc,
- typename CalculateBoundsFunc,
- typename ProgressMonitor>
-
- static ReductionTy build(CreateAllocFunc createAllocator,
- CreateNodeFunc createNode,
- SetBoundsFunc setBounds,
- CreateLeafFunc createLeaf,
- CalculateBoundsFunc calculateBounds,
- ProgressMonitor progressMonitor,
- BuildPrim* src,
- BuildPrim* tmp,
- size_t numPrimitives,
- const Settings& settings)
- {
- typedef BuilderT<
- ReductionTy,
- decltype(createAllocator()),
- CreateAllocFunc,
- CreateNodeFunc,
- SetBoundsFunc,
- CreateLeafFunc,
- CalculateBoundsFunc,
- ProgressMonitor> Builder;
-
- Builder builder(createAllocator,
- createNode,
- setBounds,
- createLeaf,
- calculateBounds,
- progressMonitor,
- settings);
-
- return builder.build(src,tmp,numPrimitives);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h
deleted file mode 100644
index 4c138dacdb..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h
+++ /dev/null
@@ -1,692 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#define MBLUR_NUM_TEMPORAL_BINS 2
-#define MBLUR_NUM_OBJECT_BINS 32
-
-#include "../bvh/bvh.h"
-#include "../common/primref_mb.h"
-#include "heuristic_binning_array_aligned.h"
-#include "heuristic_timesplit_array.h"
-
-namespace embree
-{
- namespace isa
- {
- template<typename T>
- struct SharedVector
- {
- __forceinline SharedVector() {}
-
- __forceinline SharedVector(T* ptr, size_t refCount = 1)
- : prims(ptr), refCount(refCount) {}
-
- __forceinline void incRef() {
- refCount++;
- }
-
- __forceinline void decRef()
- {
- if (--refCount == 0)
- delete prims;
- }
-
- T* prims;
- size_t refCount;
- };
-
- template<typename BuildRecord, int MAX_BRANCHING_FACTOR>
- struct LocalChildListT
- {
- typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
-
- __forceinline LocalChildListT (const BuildRecord& record)
- : numChildren(1), numSharedPrimVecs(1)
- {
- /* the local root will be freed in the ancestor where it was created (thus refCount is 2) */
- children[0] = record;
- primvecs[0] = new (&sharedPrimVecs[0]) SharedPrimRefVector(record.prims.prims, 2);
- }
-
- __forceinline ~LocalChildListT()
- {
- for (size_t i = 0; i < numChildren; i++)
- primvecs[i]->decRef();
- }
-
- __forceinline BuildRecord& operator[] ( const size_t i ) {
- return children[i];
- }
-
- __forceinline size_t size() const {
- return numChildren;
- }
-
- __forceinline void split(ssize_t bestChild, const BuildRecord& lrecord, const BuildRecord& rrecord, std::unique_ptr<mvector<PrimRefMB>> new_vector)
- {
- SharedPrimRefVector* bsharedPrimVec = primvecs[bestChild];
- if (lrecord.prims.prims == bsharedPrimVec->prims) {
- primvecs[bestChild] = bsharedPrimVec;
- bsharedPrimVec->incRef();
- }
- else {
- primvecs[bestChild] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(lrecord.prims.prims);
- }
-
- if (rrecord.prims.prims == bsharedPrimVec->prims) {
- primvecs[numChildren] = bsharedPrimVec;
- bsharedPrimVec->incRef();
- }
- else {
- primvecs[numChildren] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(rrecord.prims.prims);
- }
- bsharedPrimVec->decRef();
- new_vector.release();
-
- children[bestChild] = lrecord;
- children[numChildren] = rrecord;
- numChildren++;
- }
-
- public:
- array_t<BuildRecord,MAX_BRANCHING_FACTOR> children;
- array_t<SharedPrimRefVector*,MAX_BRANCHING_FACTOR> primvecs;
- size_t numChildren;
-
- array_t<SharedPrimRefVector,2*MAX_BRANCHING_FACTOR> sharedPrimVecs;
- size_t numSharedPrimVecs;
- };
-
- template<typename Mesh>
- struct RecalculatePrimRef
- {
- Scene* scene;
-
- __forceinline RecalculatePrimRef (Scene* scene)
- : scene(scene) {}
-
- __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
- {
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const Mesh* mesh = scene->get<Mesh>(geomID);
- const LBBox3fa lbounds = mesh->linearBounds(primID, time_range);
- const range<int> tbounds = mesh->timeSegmentRange(time_range);
- return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
- }
-
- // __noinline is workaround for ICC16 bug under MacOSX
- __noinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
- {
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const Mesh* mesh = scene->get<Mesh>(geomID);
- const LBBox3fa lbounds = mesh->linearBounds(space, primID, time_range);
- const range<int> tbounds = mesh->timeSegmentRange(time_range);
- return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
- }
-
- __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
- return scene->get<Mesh>(prim.geomID())->linearBounds(prim.primID(), time_range);
- }
-
- // __noinline is workaround for ICC16 bug under MacOSX
- __noinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
- return scene->get<Mesh>(prim.geomID())->linearBounds(space, prim.primID(), time_range);
- }
- };
-
- struct VirtualRecalculatePrimRef
- {
- Scene* scene;
-
- __forceinline VirtualRecalculatePrimRef (Scene* scene)
- : scene(scene) {}
-
- __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
- {
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const Geometry* mesh = scene->get(geomID);
- const LBBox3fa lbounds = mesh->vlinearBounds(primID, time_range);
- const range<int> tbounds = mesh->timeSegmentRange(time_range);
- return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
- }
-
- __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
- {
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const Geometry* mesh = scene->get(geomID);
- const LBBox3fa lbounds = mesh->vlinearBounds(space, primID, time_range);
- const range<int> tbounds = mesh->timeSegmentRange(time_range);
- return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
- }
-
- __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
- return scene->get(prim.geomID())->vlinearBounds(prim.primID(), time_range);
- }
-
- __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
- return scene->get(prim.geomID())->vlinearBounds(space, prim.primID(), time_range);
- }
- };
-
- struct BVHBuilderMSMBlur
- {
- /*! settings for msmblur builder */
- struct Settings
- {
- /*! default settings */
- Settings ()
- : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8),
- travCost(1.0f), intCost(1.0f), singleLeafTimeSegment(false),
- singleThreadThreshold(1024) {}
-
-
- Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold)
- : branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize),
- travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold)
- {
- minLeafSize = min(minLeafSize,maxLeafSize);
- }
-
- public:
- size_t branchingFactor; //!< branching factor of BVH to build
- size_t maxDepth; //!< maximum depth of BVH to build
- size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
- size_t minLeafSize; //!< minimum size of a leaf
- size_t maxLeafSize; //!< maximum size of a leaf
- float travCost; //!< estimated cost of one traversal step
- float intCost; //!< estimated cost of one primitive intersection
- bool singleLeafTimeSegment; //!< split time to single time range
- size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
- };
-
- struct BuildRecord
- {
- public:
- __forceinline BuildRecord () {}
-
- __forceinline BuildRecord (size_t depth)
- : depth(depth) {}
-
- __forceinline BuildRecord (const SetMB& prims, size_t depth)
- : depth(depth), prims(prims) {}
-
- __forceinline friend bool operator< (const BuildRecord& a, const BuildRecord& b) {
- return a.prims.size() < b.prims.size();
- }
-
- __forceinline size_t size() const {
- return prims.size();
- }
-
- public:
- size_t depth; //!< Depth of the root of this subtree.
- SetMB prims; //!< The list of primitives.
- };
-
- struct BuildRecordSplit : public BuildRecord
- {
- __forceinline BuildRecordSplit () {}
-
- __forceinline BuildRecordSplit (size_t depth)
- : BuildRecord(depth) {}
-
- __forceinline BuildRecordSplit (const BuildRecord& record, const BinSplit<MBLUR_NUM_OBJECT_BINS>& split)
- : BuildRecord(record), split(split) {}
-
- BinSplit<MBLUR_NUM_OBJECT_BINS> split;
- };
-
- template<
- typename NodeRef,
- typename RecalculatePrimRef,
- typename Allocator,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename SetNodeFunc,
- typename CreateLeafFunc,
- typename ProgressMonitor>
-
- class BuilderT
- {
- ALIGNED_CLASS_(16);
- static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor
- static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
-
- typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
- typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split;
- typedef mvector<PrimRefMB>* PrimRefVector;
- typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
- typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
- typedef LocalChildListT<BuildRecordSplit,MAX_BRANCHING_FACTOR> LocalChildListSplit;
-
- public:
-
- BuilderT (MemoryMonitorInterface* device,
- const RecalculatePrimRef recalculatePrimRef,
- const CreateAllocFunc createAlloc,
- const CreateNodeFunc createNode,
- const SetNodeFunc setNode,
- const CreateLeafFunc createLeaf,
- const ProgressMonitor progressMonitor,
- const Settings& settings)
- : cfg(settings),
- heuristicObjectSplit(),
- heuristicTemporalSplit(device, recalculatePrimRef),
- recalculatePrimRef(recalculatePrimRef), createAlloc(createAlloc), createNode(createNode), setNode(setNode), createLeaf(createLeaf),
- progressMonitor(progressMonitor)
- {
- if (cfg.branchingFactor > MAX_BRANCHING_FACTOR)
- throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large");
- }
-
- /*! finds the best split */
- const Split find(const SetMB& set)
- {
- /* first try standard object split */
- const Split object_split = heuristicObjectSplit.find(set,cfg.logBlockSize);
- const float object_split_sah = object_split.splitSAH();
-
- /* test temporal splits only when object split was bad */
- const float leaf_sah = set.leafSAH(cfg.logBlockSize);
- if (object_split_sah < 0.50f*leaf_sah)
- return object_split;
-
- /* do temporal splits only if the the time range is big enough */
- if (set.time_range.size() > 1.01f/float(set.max_num_time_segments))
- {
- const Split temporal_split = heuristicTemporalSplit.find(set,cfg.logBlockSize);
- const float temporal_split_sah = temporal_split.splitSAH();
-
- /* take temporal split if it improved SAH */
- if (temporal_split_sah < object_split_sah)
- return temporal_split;
- }
-
- return object_split;
- }
-
- /*! array partitioning */
- __forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)
- {
- /* perform object split */
- if (likely(split.data == Split::SPLIT_OBJECT)) {
- heuristicObjectSplit.split(split,set,lset,rset);
- }
- /* perform temporal split */
- else if (likely(split.data == Split::SPLIT_TEMPORAL)) {
- return heuristicTemporalSplit.split(split,set,lset,rset);
- }
- /* perform fallback split */
- else if (unlikely(split.data == Split::SPLIT_FALLBACK)) {
- set.deterministic_order();
- splitFallback(set,lset,rset);
- }
- /* split by geometry */
- else if (unlikely(split.data == Split::SPLIT_GEOMID)) {
- set.deterministic_order();
- splitByGeometry(set,lset,rset);
- }
- else
- assert(false);
-
- return std::unique_ptr<mvector<PrimRefMB>>();
- }
-
- /*! finds the best fallback split */
- __noinline Split findFallback(const SetMB& set)
- {
- /* split if primitives are not from same geometry */
- if (!sameGeometry(set))
- return Split(0.0f,Split::SPLIT_GEOMID);
-
- /* if a leaf can only hold a single time-segment, we might have to do additional temporal splits */
- if (cfg.singleLeafTimeSegment)
- {
- /* test if one primitive has more than one time segment in time range, if so split time */
- for (size_t i=set.begin(); i<set.end(); i++)
- {
- const PrimRefMB& prim = (*set.prims)[i];
- const range<int> itime_range = prim.timeSegmentRange(set.time_range);
- const int localTimeSegments = itime_range.size();
- assert(localTimeSegments > 0);
- if (localTimeSegments > 1) {
- const int icenter = (itime_range.begin() + itime_range.end())/2;
- const float splitTime = prim.timeStep(icenter);
- return Split(0.0f,(unsigned)Split::SPLIT_TEMPORAL,0,splitTime);
- }
- }
- }
-
- /* otherwise return fallback split */
- return Split(0.0f,Split::SPLIT_FALLBACK);
- }
-
- /*! performs fallback split */
- void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
- {
- mvector<PrimRefMB>& prims = *set.prims;
-
- const size_t begin = set.begin();
- const size_t end = set.end();
- const size_t center = (begin + end)/2;
-
- PrimInfoMB linfo = empty;
- for (size_t i=begin; i<center; i++)
- linfo.add_primref(prims[i]);
-
- PrimInfoMB rinfo = empty;
- for (size_t i=center; i<end; i++)
- rinfo.add_primref(prims[i]);
-
- new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
- new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
- }
-
- /*! checks if all primitives are from the same geometry */
- __forceinline bool sameGeometry(const SetMB& set)
- {
- if (set.size() == 0) return true;
- mvector<PrimRefMB>& prims = *set.prims;
- const size_t begin = set.begin();
- const size_t end = set.end();
- unsigned int firstGeomID = prims[begin].geomID();
- for (size_t i=begin+1; i<end; i++) {
- if (prims[i].geomID() != firstGeomID){
- return false;
- }
- }
- return true;
- }
-
- /* split by geometry ID */
- void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
- {
- assert(set.size() > 1);
-
- mvector<PrimRefMB>& prims = *set.prims;
- const size_t begin = set.begin();
- const size_t end = set.end();
-
- PrimInfoMB left(empty);
- PrimInfoMB right(empty);
- unsigned int geomID = prims[begin].geomID();
- size_t center = serial_partitioning(prims.data(),begin,end,left,right,
- [&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
- [ ] ( PrimInfoMB& dst, const PrimRefMB& prim ) { dst.add_primref(prim); });
-
- new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);
- new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
- }
-
- const NodeRecordMB4D createLargeLeaf(const BuildRecord& in, Allocator alloc)
- {
- /* this should never occur but is a fatal error */
- if (in.depth > cfg.maxDepth)
- throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
-
- /* replace already found split by fallback split */
- const BuildRecordSplit current(BuildRecord(in.prims,in.depth),findFallback(in.prims));
-
- /* special case when directly creating leaf without any splits that could shrink time_range */
- bool force_split = false;
- if (current.depth == 1 && current.size() > 0)
- {
- BBox1f c = empty;
- BBox1f p = current.prims.time_range;
- for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
- mvector<PrimRefMB>& prims = *current.prims.prims;
- c.extend(prims[i].time_range);
- }
-
- force_split = c.lower > p.lower || c.upper < p.upper;
- }
-
- /* create leaf for few primitives */
- if (current.size() <= cfg.maxLeafSize && current.split.data < Split::SPLIT_ENFORCE && !force_split)
- return createLeaf(current,alloc);
-
- /* fill all children by always splitting the largest one */
- bool hasTimeSplits = false;
- NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
- LocalChildListSplit children(current);
-
- do {
- /* find best child with largest bounding box area */
- size_t bestChild = -1;
- size_t bestSize = 0;
- for (size_t i=0; i<children.size(); i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].size() <= cfg.maxLeafSize && children[i].split.data < Split::SPLIT_ENFORCE && !force_split)
- continue;
-
- force_split = false;
-
- /* remember child with largest size */
- if (children[i].size() > bestSize) {
- bestSize = children[i].size();
- bestChild = i;
- }
- }
- if (bestChild == -1) break;
-
- /* perform best found split */
- BuildRecordSplit& brecord = children[bestChild];
- BuildRecordSplit lrecord(current.depth+1);
- BuildRecordSplit rrecord(current.depth+1);
- std::unique_ptr<mvector<PrimRefMB>> new_vector = split(brecord.split,brecord.prims,lrecord.prims,rrecord.prims);
- hasTimeSplits |= new_vector != nullptr;
-
- /* find new splits */
- lrecord.split = findFallback(lrecord.prims);
- rrecord.split = findFallback(rrecord.prims);
- children.split(bestChild,lrecord,rrecord,std::move(new_vector));
-
- } while (children.size() < cfg.branchingFactor);
-
- /* detect time_ranges that have shrunken */
- for (size_t i=0; i<children.size(); i++) {
- const BBox1f c = children[i].prims.time_range;
- const BBox1f p = in.prims.time_range;
- hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
- }
-
- /* create node */
- auto node = createNode(children.children.data(),children.numChildren,alloc,hasTimeSplits);
-
- /* recurse into each child and perform reduction */
- LBBox3fa gbounds = empty;
- for (size_t i=0; i<children.size(); i++) {
- values[i] = createLargeLeaf(children[i],alloc);
- gbounds.extend(values[i].lbounds);
- }
-
- setNode(current,children.children.data(),node,values,children.numChildren);
-
- /* calculate geometry bounds of this node */
- if (hasTimeSplits)
- return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
- else
- return NodeRecordMB4D(node,gbounds,current.prims.time_range);
- }
-
- const NodeRecordMB4D recurse(const BuildRecord& current, Allocator alloc, bool toplevel)
- {
- /* get thread local allocator */
- if (!alloc)
- alloc = createAlloc();
-
- /* call memory monitor function to signal progress */
- if (toplevel && current.size() <= cfg.singleThreadThreshold)
- progressMonitor(current.size());
-
- /*! find best split */
- const Split csplit = find(current.prims);
-
- /*! compute leaf and split cost */
- const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize);
- const float splitSAH = cfg.travCost*current.prims.halfArea()+cfg.intCost*csplit.splitSAH();
- assert((current.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0)));
-
- /*! create a leaf node when threshold reached or SAH tells us to stop */
- if (current.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) {
- current.prims.deterministic_order();
- return createLargeLeaf(current,alloc);
- }
-
- /*! perform initial split */
- SetMB lprims,rprims;
- std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,current.prims,lprims,rprims);
- bool hasTimeSplits = new_vector != nullptr;
- NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
- LocalChildList children(current);
- {
- BuildRecord lrecord(lprims,current.depth+1);
- BuildRecord rrecord(rprims,current.depth+1);
- children.split(0,lrecord,rrecord,std::move(new_vector));
- }
-
- /*! split until node is full or SAH tells us to stop */
- while (children.size() < cfg.branchingFactor)
- {
- /*! find best child to split */
- float bestArea = neg_inf;
- ssize_t bestChild = -1;
- for (size_t i=0; i<children.size(); i++)
- {
- if (children[i].size() <= cfg.minLeafSize) continue;
- if (expectedApproxHalfArea(children[i].prims.geomBounds) > bestArea) {
- bestChild = i; bestArea = expectedApproxHalfArea(children[i].prims.geomBounds);
- }
- }
- if (bestChild == -1) break;
-
- /* perform split */
- BuildRecord& brecord = children[bestChild];
- BuildRecord lrecord(current.depth+1);
- BuildRecord rrecord(current.depth+1);
- Split csplit = find(brecord.prims);
- std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,brecord.prims,lrecord.prims,rrecord.prims);
- hasTimeSplits |= new_vector != nullptr;
- children.split(bestChild,lrecord,rrecord,std::move(new_vector));
- }
-
- /* detect time_ranges that have shrunken */
- for (size_t i=0; i<children.size(); i++) {
- const BBox1f c = children[i].prims.time_range;
- const BBox1f p = current.prims.time_range;
- hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
- }
-
- /* sort buildrecords for simpler shadow ray traversal */
- //std::sort(&children[0],&children[children.size()],std::greater<BuildRecord>()); // FIXME: reduces traversal performance of bvh8.triangle4 (need to verified) !!
-
- /*! create an inner node */
- auto node = createNode(children.children.data(), children.numChildren, alloc, hasTimeSplits);
- LBBox3fa gbounds = empty;
-
- /* spawn tasks */
- if (unlikely(current.size() > cfg.singleThreadThreshold))
- {
- /*! parallel_for is faster than spawing sub-tasks */
- parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- values[i] = recurse(children[i],nullptr,true);
- _mm_mfence(); // to allow non-temporal stores during build
- }
- });
-
- /*! merge bounding boxes */
- for (size_t i=0; i<children.size(); i++)
- gbounds.extend(values[i].lbounds);
- }
- /* recurse into each child */
- else
- {
- //for (size_t i=0; i<children.size(); i++)
- for (ssize_t i=children.size()-1; i>=0; i--) {
- values[i] = recurse(children[i],alloc,false);
- gbounds.extend(values[i].lbounds);
- }
- }
-
- setNode(current,children.children.data(),node,values,children.numChildren);
-
- /* calculate geometry bounds of this node */
- if (unlikely(hasTimeSplits))
- return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
- else
- return NodeRecordMB4D(node,gbounds,current.prims.time_range);
- }
-
- /*! builder entry function */
- __forceinline const NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
- {
- const SetMB set(pinfo,&prims);
- auto ret = recurse(BuildRecord(set,1),nullptr,true);
- _mm_mfence(); // to allow non-temporal stores during build
- return ret;
- }
-
- private:
- Settings cfg;
- HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> heuristicObjectSplit;
- HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> heuristicTemporalSplit;
- const RecalculatePrimRef recalculatePrimRef;
- const CreateAllocFunc createAlloc;
- const CreateNodeFunc createNode;
- const SetNodeFunc setNode;
- const CreateLeafFunc createLeaf;
- const ProgressMonitor progressMonitor;
- };
-
- template<typename NodeRef,
- typename RecalculatePrimRef,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename SetNodeFunc,
- typename CreateLeafFunc,
- typename ProgressMonitorFunc>
-
- static const BVHNodeRecordMB4D<NodeRef> build(mvector<PrimRefMB>& prims,
- const PrimInfoMB& pinfo,
- MemoryMonitorInterface* device,
- const RecalculatePrimRef recalculatePrimRef,
- const CreateAllocFunc createAlloc,
- const CreateNodeFunc createNode,
- const SetNodeFunc setNode,
- const CreateLeafFunc createLeaf,
- const ProgressMonitorFunc progressMonitor,
- const Settings& settings)
- {
- typedef BuilderT<
- NodeRef,
- RecalculatePrimRef,
- decltype(createAlloc()),
- CreateAllocFunc,
- CreateNodeFunc,
- SetNodeFunc,
- CreateLeafFunc,
- ProgressMonitorFunc> Builder;
-
- Builder builder(device,
- recalculatePrimRef,
- createAlloc,
- createNode,
- setNode,
- createLeaf,
- progressMonitor,
- settings);
-
-
- return builder(prims,pinfo);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur_hair.h b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur_hair.h
deleted file mode 100644
index e477c313a3..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur_hair.h
+++ /dev/null
@@ -1,526 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../bvh/bvh.h"
-#include "../geometry/primitive.h"
-#include "../builders/bvh_builder_msmblur.h"
-#include "../builders/heuristic_binning_array_aligned.h"
-#include "../builders/heuristic_binning_array_unaligned.h"
-#include "../builders/heuristic_timesplit_array.h"
-
-namespace embree
-{
- namespace isa
- {
- struct BVHBuilderHairMSMBlur
- {
- /*! settings for msmblur builder */
- struct Settings
- {
- /*! default settings */
- Settings ()
- : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8) {}
-
- public:
- size_t branchingFactor; //!< branching factor of BVH to build
- size_t maxDepth; //!< maximum depth of BVH to build
- size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
- size_t minLeafSize; //!< minimum size of a leaf
- size_t maxLeafSize; //!< maximum size of a leaf
- };
-
- struct BuildRecord
- {
- public:
- __forceinline BuildRecord () {}
-
- __forceinline BuildRecord (size_t depth)
- : depth(depth) {}
-
- __forceinline BuildRecord (const SetMB& prims, size_t depth)
- : depth(depth), prims(prims) {}
-
- __forceinline size_t size() const {
- return prims.size();
- }
-
- public:
- size_t depth; //!< depth of the root of this subtree
- SetMB prims; //!< the list of primitives
- };
-
- template<typename NodeRef,
- typename RecalculatePrimRef,
- typename CreateAllocFunc,
- typename CreateAABBNodeMBFunc,
- typename SetAABBNodeMBFunc,
- typename CreateOBBNodeMBFunc,
- typename SetOBBNodeMBFunc,
- typename CreateLeafFunc,
- typename ProgressMonitor>
-
- class BuilderT
- {
- ALIGNED_CLASS_(16);
-
- static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
- static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
- static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build
-
- typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
- typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
-
- typedef FastAllocator::CachedAllocator Allocator;
- typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
-
- typedef HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> HeuristicTemporal;
- typedef HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> HeuristicBinning;
- typedef UnalignedHeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> UnalignedHeuristicBinning;
-
- public:
-
- BuilderT (Scene* scene,
- const RecalculatePrimRef& recalculatePrimRef,
- const CreateAllocFunc& createAlloc,
- const CreateAABBNodeMBFunc& createAABBNodeMB,
- const SetAABBNodeMBFunc& setAABBNodeMB,
- const CreateOBBNodeMBFunc& createOBBNodeMB,
- const SetOBBNodeMBFunc& setOBBNodeMB,
- const CreateLeafFunc& createLeaf,
- const ProgressMonitor& progressMonitor,
- const Settings settings)
-
- : cfg(settings),
- scene(scene),
- recalculatePrimRef(recalculatePrimRef),
- createAlloc(createAlloc),
- createAABBNodeMB(createAABBNodeMB), setAABBNodeMB(setAABBNodeMB),
- createOBBNodeMB(createOBBNodeMB), setOBBNodeMB(setOBBNodeMB),
- createLeaf(createLeaf),
- progressMonitor(progressMonitor),
- unalignedHeuristic(scene),
- temporalSplitHeuristic(scene->device,recalculatePrimRef) {}
-
- private:
-
- /*! checks if all primitives are from the same geometry */
- __forceinline bool sameGeometry(const SetMB& set)
- {
- mvector<PrimRefMB>& prims = *set.prims;
- unsigned int firstGeomID = prims[set.begin()].geomID();
- for (size_t i=set.begin()+1; i<set.end(); i++) {
- if (prims[i].geomID() != firstGeomID){
- return false;
- }
- }
- return true;
- }
-
- /*! performs some split if SAH approaches fail */
- void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
- {
- mvector<PrimRefMB>& prims = *set.prims;
-
- const size_t begin = set.begin();
- const size_t end = set.end();
- const size_t center = (begin + end)/2;
-
- PrimInfoMB linfo = empty;
- for (size_t i=begin; i<center; i++)
- linfo.add_primref(prims[i]);
-
- PrimInfoMB rinfo = empty;
- for (size_t i=center; i<end; i++)
- rinfo.add_primref(prims[i]);
-
- new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
- new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
- }
-
- void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
- {
- assert(set.size() > 1);
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfoMB linfo(empty);
- PrimInfoMB rinfo(empty);
- unsigned int geomID = (*set.prims)[begin].geomID();
- size_t center = serial_partitioning(set.prims->data(),begin,end,linfo,rinfo,
- [&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
- [ ] ( PrimInfoMB& a, const PrimRefMB& ref ) { a.add_primref(ref); });
-
- new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
- new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
- }
-
- /*! creates a large leaf that could be larger than supported by the BVH */
- NodeRecordMB4D createLargeLeaf(BuildRecord& current, Allocator alloc)
- {
- /* this should never occur but is a fatal error */
- if (current.depth > cfg.maxDepth)
- throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
-
- /* special case when directly creating leaf without any splits that could shrink time_range */
- bool force_split = false;
- if (current.depth == 1 && current.size() > 0)
- {
- BBox1f c = empty;
- BBox1f p = current.prims.time_range;
- for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
- mvector<PrimRefMB>& prims = *current.prims.prims;
- c.extend(prims[i].time_range);
- }
-
- force_split = c.lower > p.lower || c.upper < p.upper;
- }
-
- /* create leaf for few primitives */
- if (current.size() <= cfg.maxLeafSize && sameGeometry(current.prims) && !force_split)
- return createLeaf(current.prims,alloc);
-
- /* fill all children by always splitting the largest one */
- LocalChildList children(current);
- NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
-
- do {
-
- /* find best child with largest bounding box area */
- int bestChild = -1;
- size_t bestSize = 0;
- for (unsigned i=0; i<children.size(); i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i].prims) && !force_split)
- continue;
-
- force_split = false;
-
- /* remember child with largest size */
- if (children[i].size() > bestSize) {
- bestSize = children[i].size();
- bestChild = i;
- }
- }
- if (bestChild == -1) break;
-
- /*! split best child into left and right child */
- BuildRecord left(current.depth+1);
- BuildRecord right(current.depth+1);
- if (!sameGeometry(children[bestChild].prims)) {
- splitByGeometry(children[bestChild].prims,left.prims,right.prims);
- } else {
- splitFallback(children[bestChild].prims,left.prims,right.prims);
- }
- children.split(bestChild,left,right,std::unique_ptr<mvector<PrimRefMB>>());
-
- } while (children.size() < cfg.branchingFactor);
-
-
- /* detect time_ranges that have shrunken */
- bool timesplit = false;
- for (size_t i=0; i<children.size(); i++) {
- const BBox1f c = children[i].prims.time_range;
- const BBox1f p = current.prims.time_range;
- timesplit |= c.lower > p.lower || c.upper < p.upper;
- }
-
- /* create node */
- NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,timesplit);
-
- LBBox3fa bounds = empty;
- for (size_t i=0; i<children.size(); i++) {
- values[i] = createLargeLeaf(children[i],alloc);
- bounds.extend(values[i].lbounds);
- }
-
- setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
-
- if (timesplit)
- bounds = current.prims.linearBounds(recalculatePrimRef);
-
- return NodeRecordMB4D(node,bounds,current.prims.time_range);
- }
-
- /*! performs split */
- std::unique_ptr<mvector<PrimRefMB>> split(const BuildRecord& current, BuildRecord& lrecord, BuildRecord& rrecord, bool& aligned, bool& timesplit)
- {
- /* variable to track the SAH of the best splitting approach */
- float bestSAH = inf;
- const float leafSAH = current.prims.leafSAH(cfg.logBlockSize);
-
- /* perform standard binning in aligned space */
- HeuristicBinning::Split alignedObjectSplit = alignedHeuristic.find(current.prims,cfg.logBlockSize);
- float alignedObjectSAH = alignedObjectSplit.splitSAH();
- bestSAH = min(alignedObjectSAH,bestSAH);
-
- /* perform standard binning in unaligned space */
- UnalignedHeuristicBinning::Split unalignedObjectSplit;
- LinearSpace3fa uspace;
- float unalignedObjectSAH = inf;
- if (alignedObjectSAH > 0.7f*leafSAH) {
- uspace = unalignedHeuristic.computeAlignedSpaceMB(scene,current.prims);
- const SetMB sset = current.prims.primInfo(recalculatePrimRef,uspace);
- unalignedObjectSplit = unalignedHeuristic.find(sset,cfg.logBlockSize,uspace);
- unalignedObjectSAH = 1.3f*unalignedObjectSplit.splitSAH(); // makes unaligned splits more expensive
- bestSAH = min(unalignedObjectSAH,bestSAH);
- }
-
- /* do temporal splits only if previous approaches failed to produce good SAH and the the time range is large enough */
- float temporal_split_sah = inf;
- typename HeuristicTemporal::Split temporal_split;
- if (bestSAH > 0.5f*leafSAH) {
- if (current.prims.time_range.size() > 1.01f/float(current.prims.max_num_time_segments)) {
- temporal_split = temporalSplitHeuristic.find(current.prims,cfg.logBlockSize);
- temporal_split_sah = temporal_split.splitSAH();
- bestSAH = min(temporal_split_sah,bestSAH);
- }
- }
-
- /* perform fallback split if SAH heuristics failed */
- if (unlikely(!std::isfinite(bestSAH))) {
- current.prims.deterministic_order();
- splitFallback(current.prims,lrecord.prims,rrecord.prims);
- }
- /* perform aligned split if this is best */
- else if (likely(bestSAH == alignedObjectSAH)) {
- alignedHeuristic.split(alignedObjectSplit,current.prims,lrecord.prims,rrecord.prims);
- }
- /* perform unaligned split if this is best */
- else if (likely(bestSAH == unalignedObjectSAH)) {
- unalignedHeuristic.split(unalignedObjectSplit,uspace,current.prims,lrecord.prims,rrecord.prims);
- aligned = false;
- }
- /* perform temporal split if this is best */
- else if (likely(bestSAH == temporal_split_sah)) {
- timesplit = true;
- return temporalSplitHeuristic.split(temporal_split,current.prims,lrecord.prims,rrecord.prims);
- }
- else
- assert(false);
-
- return std::unique_ptr<mvector<PrimRefMB>>();
- }
-
- /*! recursive build */
- NodeRecordMB4D recurse(BuildRecord& current, Allocator alloc, bool toplevel)
- {
- /* get thread local allocator */
- if (!alloc)
- alloc = createAlloc();
-
- /* call memory monitor function to signal progress */
- if (toplevel && current.size() <= SINGLE_THREADED_THRESHOLD)
- progressMonitor(current.size());
-
- /* create leaf node */
- if (current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || current.size() <= cfg.minLeafSize) {
- current.prims.deterministic_order();
- return createLargeLeaf(current,alloc);
- }
-
- /* fill all children by always splitting the one with the largest surface area */
- NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
- LocalChildList children(current);
- bool aligned = true;
- bool timesplit = false;
-
- do {
-
- /* find best child with largest bounding box area */
- ssize_t bestChild = -1;
- float bestArea = neg_inf;
- for (size_t i=0; i<children.size(); i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].size() <= cfg.minLeafSize)
- continue;
-
- /* remember child with largest area */
- const float A = children[i].prims.halfArea();
- if (A > bestArea) {
- bestArea = children[i].prims.halfArea();
- bestChild = i;
- }
- }
- if (bestChild == -1) break;
-
- /*! split best child into left and right child */
- BuildRecord left(current.depth+1);
- BuildRecord right(current.depth+1);
- std::unique_ptr<mvector<PrimRefMB>> new_vector = split(children[bestChild],left,right,aligned,timesplit);
- children.split(bestChild,left,right,std::move(new_vector));
-
- } while (children.size() < cfg.branchingFactor);
-
- /* detect time_ranges that have shrunken */
- for (size_t i=0; i<children.size(); i++) {
- const BBox1f c = children[i].prims.time_range;
- const BBox1f p = current.prims.time_range;
- timesplit |= c.lower > p.lower || c.upper < p.upper;
- }
-
- /* create time split node */
- if (timesplit)
- {
- const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);
-
- /* spawn tasks or ... */
- if (current.size() > SINGLE_THREADED_THRESHOLD)
- {
- parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- values[i] = recurse(children[i],nullptr,true);
- _mm_mfence(); // to allow non-temporal stores during build
- }
- });
- }
- /* ... continue sequential */
- else {
- for (size_t i=0; i<children.size(); i++) {
- values[i] = recurse(children[i],alloc,false);
- }
- }
-
- setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
-
- const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);
- return NodeRecordMB4D(node,bounds,current.prims.time_range);
- }
-
- /* create aligned node */
- else if (aligned)
- {
- const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);
-
- /* spawn tasks or ... */
- if (current.size() > SINGLE_THREADED_THRESHOLD)
- {
- LBBox3fa cbounds[MAX_BRANCHING_FACTOR];
- parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- values[i] = recurse(children[i],nullptr,true);
- cbounds[i] = values[i].lbounds;
- _mm_mfence(); // to allow non-temporal stores during build
- }
- });
-
- LBBox3fa bounds = empty;
- for (size_t i=0; i<children.size(); i++)
- bounds.extend(cbounds[i]);
- setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
- return NodeRecordMB4D(node,bounds,current.prims.time_range);
- }
- /* ... continue sequentially */
- else
- {
- LBBox3fa bounds = empty;
- for (size_t i=0; i<children.size(); i++) {
- values[i] = recurse(children[i],alloc,false);
- bounds.extend(values[i].lbounds);
- }
- setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
- return NodeRecordMB4D(node,bounds,current.prims.time_range);
- }
- }
-
- /* create unaligned node */
- else
- {
- const NodeRef node = createOBBNodeMB(alloc);
-
- /* spawn tasks or ... */
- if (current.size() > SINGLE_THREADED_THRESHOLD)
- {
- parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);
- const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);
- const auto child = recurse(children[i],nullptr,true);
- setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);
- _mm_mfence(); // to allow non-temporal stores during build
- }
- });
- }
- /* ... continue sequentially */
- else
- {
- for (size_t i=0; i<children.size(); i++) {
- const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);
- const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);
- const auto child = recurse(children[i],alloc,false);
- setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);
- }
- }
-
- const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);
- return NodeRecordMB4D(node,bounds,current.prims.time_range);
- }
- }
-
- public:
-
- /*! entry point into builder */
- NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
- {
- BuildRecord record(SetMB(pinfo,&prims),1);
- auto root = recurse(record,nullptr,true);
- _mm_mfence(); // to allow non-temporal stores during build
- return root;
- }
-
- private:
- Settings cfg;
- Scene* scene;
- const RecalculatePrimRef& recalculatePrimRef;
- const CreateAllocFunc& createAlloc;
- const CreateAABBNodeMBFunc& createAABBNodeMB;
- const SetAABBNodeMBFunc& setAABBNodeMB;
- const CreateOBBNodeMBFunc& createOBBNodeMB;
- const SetOBBNodeMBFunc& setOBBNodeMB;
- const CreateLeafFunc& createLeaf;
- const ProgressMonitor& progressMonitor;
-
- private:
- HeuristicBinning alignedHeuristic;
- UnalignedHeuristicBinning unalignedHeuristic;
- HeuristicTemporal temporalSplitHeuristic;
- };
-
- template<typename NodeRef,
- typename RecalculatePrimRef,
- typename CreateAllocFunc,
- typename CreateAABBNodeMBFunc,
- typename SetAABBNodeMBFunc,
- typename CreateOBBNodeMBFunc,
- typename SetOBBNodeMBFunc,
- typename CreateLeafFunc,
- typename ProgressMonitor>
-
- static BVHNodeRecordMB4D<NodeRef> build (Scene* scene, mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo,
- const RecalculatePrimRef& recalculatePrimRef,
- const CreateAllocFunc& createAlloc,
- const CreateAABBNodeMBFunc& createAABBNodeMB,
- const SetAABBNodeMBFunc& setAABBNodeMB,
- const CreateOBBNodeMBFunc& createOBBNodeMB,
- const SetOBBNodeMBFunc& setOBBNodeMB,
- const CreateLeafFunc& createLeaf,
- const ProgressMonitor& progressMonitor,
- const Settings settings)
- {
- typedef BuilderT<NodeRef,RecalculatePrimRef,CreateAllocFunc,
- CreateAABBNodeMBFunc,SetAABBNodeMBFunc,
- CreateOBBNodeMBFunc,SetOBBNodeMBFunc,
- CreateLeafFunc,ProgressMonitor> Builder;
-
- Builder builder(scene,recalculatePrimRef,createAlloc,
- createAABBNodeMB,setAABBNodeMB,
- createOBBNodeMB,setOBBNodeMB,
- createLeaf,progressMonitor,settings);
-
- return builder(prims,pinfo);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_sah.h b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_sah.h
deleted file mode 100644
index 3f7e678a10..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_sah.h
+++ /dev/null
@@ -1,669 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "heuristic_binning_array_aligned.h"
-#include "heuristic_spatial_array.h"
-#include "heuristic_openmerge_array.h"
-
-#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL
-# define NUM_OBJECT_BINS 16
-# define NUM_SPATIAL_BINS 16
-#else
-# define NUM_OBJECT_BINS 32
-# define NUM_SPATIAL_BINS 16
-#endif
-
-namespace embree
-{
- namespace isa
- {
- MAYBE_UNUSED static const float travCost = 1.0f;
- MAYBE_UNUSED static const size_t DEFAULT_SINGLE_THREAD_THRESHOLD = 1024;
-
- struct GeneralBVHBuilder
- {
- static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor
- static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree of we are that many levels before the maximum tree depth
-
-
- /*! settings for SAH builder */
- struct Settings
- {
- /*! default settings */
- Settings ()
- : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7),
- travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf) {}
-
- /*! initialize settings from API settings */
- Settings (const RTCBuildArguments& settings)
- : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7),
- travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf)
- {
- if (RTC_BUILD_ARGUMENTS_HAS(settings,maxBranchingFactor)) branchingFactor = settings.maxBranchingFactor;
- if (RTC_BUILD_ARGUMENTS_HAS(settings,maxDepth )) maxDepth = settings.maxDepth;
- if (RTC_BUILD_ARGUMENTS_HAS(settings,sahBlockSize )) logBlockSize = bsr(static_cast<size_t>(settings.sahBlockSize));
- if (RTC_BUILD_ARGUMENTS_HAS(settings,minLeafSize )) minLeafSize = settings.minLeafSize;
- if (RTC_BUILD_ARGUMENTS_HAS(settings,maxLeafSize )) maxLeafSize = settings.maxLeafSize;
- if (RTC_BUILD_ARGUMENTS_HAS(settings,traversalCost )) travCost = settings.traversalCost;
- if (RTC_BUILD_ARGUMENTS_HAS(settings,intersectionCost )) intCost = settings.intersectionCost;
-
- minLeafSize = min(minLeafSize,maxLeafSize);
- }
-
- Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold, size_t primrefarrayalloc = inf)
- : branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize),
- travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold), primrefarrayalloc(primrefarrayalloc)
- {
- minLeafSize = min(minLeafSize,maxLeafSize);
- }
-
- public:
- size_t branchingFactor; //!< branching factor of BVH to build
- size_t maxDepth; //!< maximum depth of BVH to build
- size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
- size_t minLeafSize; //!< minimum size of a leaf
- size_t maxLeafSize; //!< maximum size of a leaf
- float travCost; //!< estimated cost of one traversal step
- float intCost; //!< estimated cost of one primitive intersection
- size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
- size_t primrefarrayalloc; //!< builder uses prim ref array to allocate nodes and leaves when a subtree of that size is finished
- };
-
- /*! recursive state of builder */
- template<typename Set, typename Split>
- struct BuildRecordT
- {
- public:
- __forceinline BuildRecordT () {}
-
- __forceinline BuildRecordT (size_t depth)
- : depth(depth), alloc_barrier(false), prims(empty) {}
-
- __forceinline BuildRecordT (size_t depth, const Set& prims)
- : depth(depth), alloc_barrier(false), prims(prims) {}
-
- __forceinline BBox3fa bounds() const { return prims.geomBounds; }
-
- __forceinline friend bool operator< (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() < b.prims.size(); }
- __forceinline friend bool operator> (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() > b.prims.size(); }
-
- __forceinline size_t size() const { return prims.size(); }
-
- public:
- size_t depth; //!< Depth of the root of this subtree.
- bool alloc_barrier; //!< barrier used to reuse primref-array blocks to allocate nodes
- Set prims; //!< The list of primitives.
- };
-
- template<typename PrimRef, typename Set>
- struct DefaultCanCreateLeafFunc
- {
- __forceinline bool operator()(const PrimRef*, const Set&) const { return true; }
- };
-
- template<typename PrimRef, typename Set>
- struct DefaultCanCreateLeafSplitFunc
- {
- __forceinline void operator()(PrimRef*, const Set&, Set&, Set&) const { }
- };
-
- template<typename BuildRecord,
- typename Heuristic,
- typename Set,
- typename PrimRef,
- typename ReductionTy,
- typename Allocator,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename UpdateNodeFunc,
- typename CreateLeafFunc,
- typename CanCreateLeafFunc,
- typename CanCreateLeafSplitFunc,
- typename ProgressMonitor>
-
- class BuilderT
- {
- friend struct GeneralBVHBuilder;
-
- BuilderT (PrimRef* prims,
- Heuristic& heuristic,
- const CreateAllocFunc& createAlloc,
- const CreateNodeFunc& createNode,
- const UpdateNodeFunc& updateNode,
- const CreateLeafFunc& createLeaf,
- const CanCreateLeafFunc& canCreateLeaf,
- const CanCreateLeafSplitFunc& canCreateLeafSplit,
- const ProgressMonitor& progressMonitor,
- const Settings& settings) :
- cfg(settings),
- prims(prims),
- heuristic(heuristic),
- createAlloc(createAlloc),
- createNode(createNode),
- updateNode(updateNode),
- createLeaf(createLeaf),
- canCreateLeaf(canCreateLeaf),
- canCreateLeafSplit(canCreateLeafSplit),
- progressMonitor(progressMonitor)
- {
- if (cfg.branchingFactor > MAX_BRANCHING_FACTOR)
- throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large");
- }
-
- const ReductionTy createLargeLeaf(const BuildRecord& current, Allocator alloc)
- {
- /* this should never occur but is a fatal error */
- if (current.depth > cfg.maxDepth)
- throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
-
- /* create leaf for few primitives */
- if (current.prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,current.prims))
- return createLeaf(prims,current.prims,alloc);
-
- /* fill all children by always splitting the largest one */
- ReductionTy values[MAX_BRANCHING_FACTOR];
- BuildRecord children[MAX_BRANCHING_FACTOR];
- size_t numChildren = 1;
- children[0] = current;
- do {
-
- /* find best child with largest bounding box area */
- size_t bestChild = -1;
- size_t bestSize = 0;
- for (size_t i=0; i<numChildren; i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,children[i].prims))
- continue;
-
- /* remember child with largest size */
- if (children[i].prims.size() > bestSize) {
- bestSize = children[i].prims.size();
- bestChild = i;
- }
- }
- if (bestChild == (size_t)-1) break;
-
- /*! split best child into left and right child */
- BuildRecord left(current.depth+1);
- BuildRecord right(current.depth+1);
- if (!canCreateLeaf(prims,children[bestChild].prims)) {
- canCreateLeafSplit(prims,children[bestChild].prims,left.prims,right.prims);
- } else {
- heuristic.splitFallback(children[bestChild].prims,left.prims,right.prims);
- }
-
- /* add new children left and right */
- children[bestChild] = children[numChildren-1];
- children[numChildren-1] = left;
- children[numChildren+0] = right;
- numChildren++;
-
- } while (numChildren < cfg.branchingFactor);
-
- /* set barrier for primrefarrayalloc */
- if (unlikely(current.size() > cfg.primrefarrayalloc))
- for (size_t i=0; i<numChildren; i++)
- children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc;
-
- /* create node */
- auto node = createNode(children,numChildren,alloc);
-
- /* recurse into each child and perform reduction */
- for (size_t i=0; i<numChildren; i++)
- values[i] = createLargeLeaf(children[i],alloc);
-
- /* perform reduction */
- return updateNode(current,children,node,values,numChildren);
- }
-
- const ReductionTy recurse(BuildRecord& current, Allocator alloc, bool toplevel)
- {
- /* get thread local allocator */
- if (!alloc)
- alloc = createAlloc();
-
- /* call memory monitor function to signal progress */
- if (toplevel && current.size() <= cfg.singleThreadThreshold)
- progressMonitor(current.size());
-
- /*! find best split */
- auto split = heuristic.find(current.prims,cfg.logBlockSize);
-
- /*! compute leaf and split cost */
- const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize);
- const float splitSAH = cfg.travCost*halfArea(current.prims.geomBounds)+cfg.intCost*split.splitSAH();
- assert((current.prims.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0)));
-
- /*! create a leaf node when threshold reached or SAH tells us to stop */
- if (current.prims.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.prims.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) {
- heuristic.deterministic_order(current.prims);
- return createLargeLeaf(current,alloc);
- }
-
- /*! perform initial split */
- Set lprims,rprims;
- heuristic.split(split,current.prims,lprims,rprims);
-
- /*! initialize child list with initial split */
- ReductionTy values[MAX_BRANCHING_FACTOR];
- BuildRecord children[MAX_BRANCHING_FACTOR];
- children[0] = BuildRecord(current.depth+1,lprims);
- children[1] = BuildRecord(current.depth+1,rprims);
- size_t numChildren = 2;
-
- /*! split until node is full or SAH tells us to stop */
- while (numChildren < cfg.branchingFactor)
- {
- /*! find best child to split */
- float bestArea = neg_inf;
- ssize_t bestChild = -1;
- for (size_t i=0; i<numChildren; i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].prims.size() <= cfg.minLeafSize) continue;
-
- /* find child with largest surface area */
- if (halfArea(children[i].prims.geomBounds) > bestArea) {
- bestChild = i;
- bestArea = halfArea(children[i].prims.geomBounds);
- }
- }
- if (bestChild == -1) break;
-
- /* perform best found split */
- BuildRecord& brecord = children[bestChild];
- BuildRecord lrecord(current.depth+1);
- BuildRecord rrecord(current.depth+1);
- auto split = heuristic.find(brecord.prims,cfg.logBlockSize);
- heuristic.split(split,brecord.prims,lrecord.prims,rrecord.prims);
- children[bestChild ] = lrecord;
- children[numChildren] = rrecord;
- numChildren++;
- }
-
- /* set barrier for primrefarrayalloc */
- if (unlikely(current.size() > cfg.primrefarrayalloc))
- for (size_t i=0; i<numChildren; i++)
- children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc;
-
- /* sort buildrecords for faster shadow ray traversal */
- std::sort(&children[0],&children[numChildren],std::greater<BuildRecord>());
-
- /*! create an inner node */
- auto node = createNode(children,numChildren,alloc);
-
- /* spawn tasks */
- if (current.size() > cfg.singleThreadThreshold)
- {
- /*! parallel_for is faster than spawing sub-tasks */
- parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { // FIXME: no range here
- for (size_t i=r.begin(); i<r.end(); i++) {
- values[i] = recurse(children[i],nullptr,true);
- _mm_mfence(); // to allow non-temporal stores during build
- }
- });
-
- return updateNode(current,children,node,values,numChildren);
- }
- /* recurse into each child */
- else
- {
- for (size_t i=0; i<numChildren; i++)
- values[i] = recurse(children[i],alloc,false);
-
- return updateNode(current,children,node,values,numChildren);
- }
- }
-
- private:
- Settings cfg;
- PrimRef* prims;
- Heuristic& heuristic;
- const CreateAllocFunc& createAlloc;
- const CreateNodeFunc& createNode;
- const UpdateNodeFunc& updateNode;
- const CreateLeafFunc& createLeaf;
- const CanCreateLeafFunc& canCreateLeaf;
- const CanCreateLeafSplitFunc& canCreateLeafSplit;
- const ProgressMonitor& progressMonitor;
- };
-
- template<
- typename ReductionTy,
- typename Heuristic,
- typename Set,
- typename PrimRef,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename UpdateNodeFunc,
- typename CreateLeafFunc,
- typename ProgressMonitor>
-
- __noinline static ReductionTy build(Heuristic& heuristic,
- PrimRef* prims,
- const Set& set,
- CreateAllocFunc createAlloc,
- CreateNodeFunc createNode, UpdateNodeFunc updateNode,
- const CreateLeafFunc& createLeaf,
- const ProgressMonitor& progressMonitor,
- const Settings& settings)
- {
- typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
-
- typedef BuilderT<
- BuildRecord,
- Heuristic,
- Set,
- PrimRef,
- ReductionTy,
- decltype(createAlloc()),
- CreateAllocFunc,
- CreateNodeFunc,
- UpdateNodeFunc,
- CreateLeafFunc,
- DefaultCanCreateLeafFunc<PrimRef, Set>,
- DefaultCanCreateLeafSplitFunc<PrimRef, Set>,
- ProgressMonitor> Builder;
-
- /* instantiate builder */
- Builder builder(prims,
- heuristic,
- createAlloc,
- createNode,
- updateNode,
- createLeaf,
- DefaultCanCreateLeafFunc<PrimRef, Set>(),
- DefaultCanCreateLeafSplitFunc<PrimRef, Set>(),
- progressMonitor,
- settings);
-
- /* build hierarchy */
- BuildRecord record(1,set);
- const ReductionTy root = builder.recurse(record,nullptr,true);
- _mm_mfence(); // to allow non-temporal stores during build
- return root;
- }
-
- template<
- typename ReductionTy,
- typename Heuristic,
- typename Set,
- typename PrimRef,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename UpdateNodeFunc,
- typename CreateLeafFunc,
- typename CanCreateLeafFunc,
- typename CanCreateLeafSplitFunc,
- typename ProgressMonitor>
-
- __noinline static ReductionTy build(Heuristic& heuristic,
- PrimRef* prims,
- const Set& set,
- CreateAllocFunc createAlloc,
- CreateNodeFunc createNode, UpdateNodeFunc updateNode,
- const CreateLeafFunc& createLeaf,
- const CanCreateLeafFunc& canCreateLeaf,
- const CanCreateLeafSplitFunc& canCreateLeafSplit,
- const ProgressMonitor& progressMonitor,
- const Settings& settings)
- {
- typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
-
- typedef BuilderT<
- BuildRecord,
- Heuristic,
- Set,
- PrimRef,
- ReductionTy,
- decltype(createAlloc()),
- CreateAllocFunc,
- CreateNodeFunc,
- UpdateNodeFunc,
- CreateLeafFunc,
- CanCreateLeafFunc,
- CanCreateLeafSplitFunc,
- ProgressMonitor> Builder;
-
- /* instantiate builder */
- Builder builder(prims,
- heuristic,
- createAlloc,
- createNode,
- updateNode,
- createLeaf,
- canCreateLeaf,
- canCreateLeafSplit,
- progressMonitor,
- settings);
-
- /* build hierarchy */
- BuildRecord record(1,set);
- const ReductionTy root = builder.recurse(record,nullptr,true);
- _mm_mfence(); // to allow non-temporal stores during build
- return root;
- }
- };
-
- /* SAH builder that operates on an array of BuildRecords */
- struct BVHBuilderBinnedSAH
- {
- typedef PrimInfoRange Set;
- typedef HeuristicArrayBinningSAH<PrimRef,NUM_OBJECT_BINS> Heuristic;
- typedef GeneralBVHBuilder::BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
- typedef GeneralBVHBuilder::Settings Settings;
-
- /*! special builder that propagates reduction over the tree */
- template<
- typename ReductionTy,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename UpdateNodeFunc,
- typename CreateLeafFunc,
- typename ProgressMonitor>
-
- static ReductionTy build(CreateAllocFunc createAlloc,
- CreateNodeFunc createNode, UpdateNodeFunc updateNode,
- const CreateLeafFunc& createLeaf,
- const ProgressMonitor& progressMonitor,
- PrimRef* prims, const PrimInfo& pinfo,
- const Settings& settings)
- {
- Heuristic heuristic(prims);
- return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
- heuristic,
- prims,
- PrimInfoRange(0,pinfo.size(),pinfo),
- createAlloc,
- createNode,
- updateNode,
- createLeaf,
- progressMonitor,
- settings);
- }
-
- /*! special builder that propagates reduction over the tree */
- template<
- typename ReductionTy,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename UpdateNodeFunc,
- typename CreateLeafFunc,
- typename CanCreateLeafFunc,
- typename CanCreateLeafSplitFunc,
- typename ProgressMonitor>
-
- static ReductionTy build(CreateAllocFunc createAlloc,
- CreateNodeFunc createNode, UpdateNodeFunc updateNode,
- const CreateLeafFunc& createLeaf,
- const CanCreateLeafFunc& canCreateLeaf,
- const CanCreateLeafSplitFunc& canCreateLeafSplit,
- const ProgressMonitor& progressMonitor,
- PrimRef* prims, const PrimInfo& pinfo,
- const Settings& settings)
- {
- Heuristic heuristic(prims);
- return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
- heuristic,
- prims,
- PrimInfoRange(0,pinfo.size(),pinfo),
- createAlloc,
- createNode,
- updateNode,
- createLeaf,
- canCreateLeaf,
- canCreateLeafSplit,
- progressMonitor,
- settings);
- }
- };
-
- /* Spatial SAH builder that operates on an double-buffered array of BuildRecords */
- struct BVHBuilderBinnedFastSpatialSAH
- {
- typedef PrimInfoExtRange Set;
- typedef Split2<BinSplit<NUM_OBJECT_BINS>,SpatialBinSplit<NUM_SPATIAL_BINS> > Split;
- typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord;
- typedef GeneralBVHBuilder::Settings Settings;
-
- static const unsigned int GEOMID_MASK = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
- static const unsigned int SPLITS_MASK = 0xFFFFFFFF << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
-
- template<typename ReductionTy, typename UserCreateLeaf>
- struct CreateLeafExt
- {
- __forceinline CreateLeafExt (const UserCreateLeaf userCreateLeaf)
- : userCreateLeaf(userCreateLeaf) {}
-
- // __noinline is workaround for ICC2016 compiler bug
- template<typename Allocator>
- __noinline ReductionTy operator() (PrimRef* prims, const range<size_t>& range, Allocator alloc) const
- {
- for (size_t i=range.begin(); i<range.end(); i++)
- prims[i].lower.u &= GEOMID_MASK;
-
- return userCreateLeaf(prims,range,alloc);
- }
-
- const UserCreateLeaf userCreateLeaf;
- };
-
- /*! special builder that propagates reduction over the tree */
- template<
- typename ReductionTy,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename UpdateNodeFunc,
- typename CreateLeafFunc,
- typename SplitPrimitiveFunc,
- typename ProgressMonitor>
-
- static ReductionTy build(CreateAllocFunc createAlloc,
- CreateNodeFunc createNode,
- UpdateNodeFunc updateNode,
- const CreateLeafFunc& createLeaf,
- SplitPrimitiveFunc splitPrimitive,
- ProgressMonitor progressMonitor,
- PrimRef* prims,
- const size_t extSize,
- const PrimInfo& pinfo,
- const Settings& settings)
- {
- typedef HeuristicArraySpatialSAH<SplitPrimitiveFunc,PrimRef,NUM_OBJECT_BINS,NUM_SPATIAL_BINS> Heuristic;
- Heuristic heuristic(splitPrimitive,prims,pinfo);
-
- /* calculate total surface area */ // FIXME: this sum is not deterministic
- const float A = (float) parallel_reduce(size_t(0),pinfo.size(),0.0, [&] (const range<size_t>& r) -> double {
-
- double A = 0.0f;
- for (size_t i=r.begin(); i<r.end(); i++)
- {
- PrimRef& prim = prims[i];
- A += area(prim.bounds());
- }
- return A;
- },std::plus<double>());
-
-
- /* calculate maximum number of spatial splits per primitive */
- const unsigned int maxSplits = ((size_t)1 << RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)-1;
- const float f = 10.0f;
-
- const float invA = 1.0f / A;
- parallel_for( size_t(0), pinfo.size(), [&](const range<size_t>& r) {
-
- for (size_t i=r.begin(); i<r.end(); i++)
- {
- PrimRef& prim = prims[i];
- assert((prim.geomID() & SPLITS_MASK) == 0);
- // FIXME: is there a better general heuristic ?
- const float nf = ceilf(f*pinfo.size()*area(prim.bounds()) * invA);
- unsigned int n = 4+min((int)maxSplits-4, max(1, (int)(nf)));
- prim.lower.u |= n << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
- }
- });
-
- return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
- heuristic,
- prims,
- PrimInfoExtRange(0,pinfo.size(),extSize,pinfo),
- createAlloc,
- createNode,
- updateNode,
- CreateLeafExt<ReductionTy,CreateLeafFunc>(createLeaf),
- progressMonitor,
- settings);
- }
- };
-
- /* Open/Merge SAH builder that operates on an array of BuildRecords */
- struct BVHBuilderBinnedOpenMergeSAH
- {
- static const size_t NUM_OBJECT_BINS_HQ = 32;
- typedef PrimInfoExtRange Set;
- typedef BinSplit<NUM_OBJECT_BINS_HQ> Split;
- typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord;
- typedef GeneralBVHBuilder::Settings Settings;
-
- /*! special builder that propagates reduction over the tree */
- template<
- typename ReductionTy,
- typename BuildRef,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename UpdateNodeFunc,
- typename CreateLeafFunc,
- typename NodeOpenerFunc,
- typename ProgressMonitor>
-
- static ReductionTy build(CreateAllocFunc createAlloc,
- CreateNodeFunc createNode,
- UpdateNodeFunc updateNode,
- const CreateLeafFunc& createLeaf,
- NodeOpenerFunc nodeOpenerFunc,
- ProgressMonitor progressMonitor,
- BuildRef* prims,
- const size_t extSize,
- const PrimInfo& pinfo,
- const Settings& settings)
- {
- typedef HeuristicArrayOpenMergeSAH<NodeOpenerFunc,BuildRef,NUM_OBJECT_BINS_HQ> Heuristic;
- Heuristic heuristic(nodeOpenerFunc,prims,settings.branchingFactor);
-
- return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,BuildRef>(
- heuristic,
- prims,
- PrimInfoExtRange(0,pinfo.size(),extSize,pinfo),
- createAlloc,
- createNode,
- updateNode,
- createLeaf,
- progressMonitor,
- settings);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_binning.h
deleted file mode 100644
index a4d3b68e46..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning.h
+++ /dev/null
@@ -1,972 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "priminfo.h"
-#include "../../common/algorithms/parallel_reduce.h"
-#include "../../common/algorithms/parallel_partition.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! mapping into bins */
- template<size_t BINS>
- struct BinMapping
- {
- public:
- __forceinline BinMapping() {}
-
- /*! calculates the mapping */
- __forceinline BinMapping(size_t N, const BBox3fa& centBounds)
- {
- num = min(BINS,size_t(4.0f + 0.05f*N));
- assert(num >= 1);
- const vfloat4 eps = 1E-34f;
- const vfloat4 diag = max(eps, (vfloat4) centBounds.size());
- scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
- ofs = (vfloat4) centBounds.lower;
- }
-
- /*! calculates the mapping */
- __forceinline BinMapping(const BBox3fa& centBounds)
- {
- num = BINS;
- const vfloat4 eps = 1E-34f;
- const vfloat4 diag = max(eps, (vfloat4) centBounds.size());
- scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
- ofs = (vfloat4) centBounds.lower;
- }
-
- /*! calculates the mapping */
- template<typename PrimInfo>
- __forceinline BinMapping(const PrimInfo& pinfo)
- {
- const vfloat4 eps = 1E-34f;
- num = min(BINS,size_t(4.0f + 0.05f*pinfo.size()));
- const vfloat4 diag = max(eps,(vfloat4) pinfo.centBounds.size());
- scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
- ofs = (vfloat4) pinfo.centBounds.lower;
- }
-
- /*! returns number of bins */
- __forceinline size_t size() const { return num; }
-
- /*! slower but safe binning */
- __forceinline Vec3ia bin(const Vec3fa& p) const
- {
- const vint4 i = floori((vfloat4(p)-ofs)*scale);
-#if 1
- assert(i[0] >= 0 && (size_t)i[0] < num);
- assert(i[1] >= 0 && (size_t)i[1] < num);
- assert(i[2] >= 0 && (size_t)i[2] < num);
- return Vec3ia(i);
-#else
- return Vec3ia(clamp(i,vint4(0),vint4(num-1)));
-#endif
- }
-
- /*! faster but unsafe binning */
- __forceinline Vec3ia bin_unsafe(const Vec3fa& p) const {
- return Vec3ia(floori((vfloat4(p)-ofs)*scale));
- }
-
- /*! faster but unsafe binning */
- template<typename PrimRef>
- __forceinline Vec3ia bin_unsafe(const PrimRef& p) const {
- return bin_unsafe(p.binCenter());
- }
-
- /*! faster but unsafe binning */
- template<typename PrimRef, typename BinBoundsAndCenter>
- __forceinline Vec3ia bin_unsafe(const PrimRef& p, const BinBoundsAndCenter& binBoundsAndCenter) const {
- return bin_unsafe(binBoundsAndCenter.binCenter(p));
- }
-
- template<typename PrimRef>
- __forceinline bool bin_unsafe(const PrimRef& ref,
- const vint4& vSplitPos,
- const vbool4& splitDimMask) const // FIXME: rename to isLeft
- {
- return any(((vint4)bin_unsafe(center2(ref.bounds())) < vSplitPos) & splitDimMask);
- }
- /*! calculates left spatial position of bin */
- __forceinline float pos(const size_t bin, const size_t dim) const {
- return madd(float(bin),1.0f / scale[dim],ofs[dim]);
- }
-
- /*! returns true if the mapping is invalid in some dimension */
- __forceinline bool invalid(const size_t dim) const {
- return scale[dim] == 0.0f;
- }
-
- /*! stream output */
- friend embree_ostream operator<<(embree_ostream cout, const BinMapping& mapping) {
- return cout << "BinMapping { num = " << mapping.num << ", ofs = " << mapping.ofs << ", scale = " << mapping.scale << "}";
- }
-
- public:
- size_t num;
- vfloat4 ofs,scale; //!< linear function that maps to bin ID
- };
-
- /*! stores all information to perform some split */
- template<size_t BINS>
- struct BinSplit
- {
- enum
- {
- SPLIT_OBJECT = 0,
- SPLIT_FALLBACK = 1,
- SPLIT_ENFORCE = 2, // splits with larger ID are enforced in createLargeLeaf even if we could create a leaf already
- SPLIT_TEMPORAL = 2,
- SPLIT_GEOMID = 3,
- };
-
- /*! construct an invalid split by default */
- __forceinline BinSplit()
- : sah(inf), dim(-1), pos(0), data(0) {}
-
- __forceinline BinSplit(float sah, unsigned data, int dim = 0, float fpos = 0)
- : sah(sah), dim(dim), fpos(fpos), data(data) {}
-
- /*! constructs specified split */
- __forceinline BinSplit(float sah, int dim, int pos, const BinMapping<BINS>& mapping)
- : sah(sah), dim(dim), pos(pos), data(0), mapping(mapping) {}
-
- /*! tests if this split is valid */
- __forceinline bool valid() const { return dim != -1; }
-
- /*! calculates surface area heuristic for performing the split */
- __forceinline float splitSAH() const { return sah; }
-
- /*! stream output */
- friend embree_ostream operator<<(embree_ostream cout, const BinSplit& split) {
- return cout << "BinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << "}";
- }
-
- public:
- float sah; //!< SAH cost of the split
- int dim; //!< split dimension
- union { int pos; float fpos; }; //!< bin index for splitting
- unsigned int data; //!< extra optional split data
- BinMapping<BINS> mapping; //!< mapping into bins
- };
-
- /*! stores extended information about the split */
- template<typename BBox>
- struct SplitInfoT
- {
-
- __forceinline SplitInfoT () {}
-
- __forceinline SplitInfoT (size_t leftCount, const BBox& leftBounds, size_t rightCount, const BBox& rightBounds)
- : leftCount(leftCount), rightCount(rightCount), leftBounds(leftBounds), rightBounds(rightBounds) {}
-
- public:
- size_t leftCount,rightCount;
- BBox leftBounds,rightBounds;
- };
-
- typedef SplitInfoT<BBox3fa> SplitInfo;
- typedef SplitInfoT<LBBox3fa> SplitInfo2;
-
- /*! stores all binning information */
- template<size_t BINS, typename PrimRef, typename BBox>
- struct __aligned(64) BinInfoT
- {
- typedef BinSplit<BINS> Split;
- typedef vbool4 vbool;
- typedef vint4 vint;
- typedef vfloat4 vfloat;
-
- __forceinline BinInfoT() {
- }
-
- __forceinline BinInfoT(EmptyTy) {
- clear();
- }
-
- /*! bin access function */
- __forceinline BBox &bounds(const size_t binID, const size_t dimID) { return _bounds[binID][dimID]; }
- __forceinline const BBox &bounds(const size_t binID, const size_t dimID) const { return _bounds[binID][dimID]; }
-
- __forceinline unsigned int &counts(const size_t binID, const size_t dimID) { return _counts[binID][dimID]; }
- __forceinline const unsigned int &counts(const size_t binID, const size_t dimID) const { return _counts[binID][dimID]; }
-
- __forceinline vuint4 &counts(const size_t binID) { return _counts[binID]; }
- __forceinline const vuint4 &counts(const size_t binID) const { return _counts[binID]; }
-
- /*! clears the bin info */
- __forceinline void clear()
- {
- for (size_t i=0; i<BINS; i++) {
- bounds(i,0) = bounds(i,1) = bounds(i,2) = empty;
- counts(i) = vuint4(zero);
- }
- }
-
- /*! bins an array of primitives */
- __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping)
- {
- if (unlikely(N == 0)) return;
- size_t i;
- for (i=0; i<N-1; i+=2)
- {
- /*! map even and odd primitive to bin */
- BBox prim0; Vec3fa center0;
- prims[i+0].binBoundsAndCenter(prim0,center0);
- const vint4 bin0 = (vint4)mapping.bin(center0);
-
- BBox prim1; Vec3fa center1;
- prims[i+1].binBoundsAndCenter(prim1,center1);
- const vint4 bin1 = (vint4)mapping.bin(center1);
-
- /*! increase bounds for bins for even primitive */
- const unsigned int b00 = extract<0>(bin0); bounds(b00,0).extend(prim0);
- const unsigned int b01 = extract<1>(bin0); bounds(b01,1).extend(prim0);
- const unsigned int b02 = extract<2>(bin0); bounds(b02,2).extend(prim0);
- const unsigned int s0 = (unsigned int)prims[i+0].size();
- counts(b00,0)+=s0;
- counts(b01,1)+=s0;
- counts(b02,2)+=s0;
-
- /*! increase bounds of bins for odd primitive */
- const unsigned int b10 = extract<0>(bin1); bounds(b10,0).extend(prim1);
- const unsigned int b11 = extract<1>(bin1); bounds(b11,1).extend(prim1);
- const unsigned int b12 = extract<2>(bin1); bounds(b12,2).extend(prim1);
- const unsigned int s1 = (unsigned int)prims[i+1].size();
- counts(b10,0)+=s1;
- counts(b11,1)+=s1;
- counts(b12,2)+=s1;
- }
- /*! for uneven number of primitives */
- if (i < N)
- {
- /*! map primitive to bin */
- BBox prim0; Vec3fa center0;
- prims[i].binBoundsAndCenter(prim0,center0);
- const vint4 bin0 = (vint4)mapping.bin(center0);
-
- /*! increase bounds of bins */
- const unsigned int s0 = (unsigned int)prims[i].size();
- const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
- const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
- const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
- }
- }
-
- /*! bins an array of primitives */
- template<typename BinBoundsAndCenter>
- __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
- {
- if (N == 0) return;
-
- size_t i;
- for (i=0; i<N-1; i+=2)
- {
- /*! map even and odd primitive to bin */
- BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);
- const vint4 bin0 = (vint4)mapping.bin(center0);
- BBox prim1; Vec3fa center1; binBoundsAndCenter.binBoundsAndCenter(prims[i+1],prim1,center1);
- const vint4 bin1 = (vint4)mapping.bin(center1);
-
- /*! increase bounds for bins for even primitive */
- const unsigned int s0 = prims[i+0].size();
- const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
- const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
- const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
-
- /*! increase bounds of bins for odd primitive */
- const unsigned int s1 = prims[i+1].size();
- const int b10 = extract<0>(bin1); counts(b10,0)+=s1; bounds(b10,0).extend(prim1);
- const int b11 = extract<1>(bin1); counts(b11,1)+=s1; bounds(b11,1).extend(prim1);
- const int b12 = extract<2>(bin1); counts(b12,2)+=s1; bounds(b12,2).extend(prim1);
- }
-
- /*! for uneven number of primitives */
- if (i < N)
- {
- /*! map primitive to bin */
- BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);
- const vint4 bin0 = (vint4)mapping.bin(center0);
-
- /*! increase bounds of bins */
- const unsigned int s0 = prims[i+0].size();
- const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
- const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
- const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
- }
- }
-
- __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping) {
- bin(prims+begin,end-begin,mapping);
- }
-
- template<typename BinBoundsAndCenter>
- __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) {
- bin<BinBoundsAndCenter>(prims+begin,end-begin,mapping,binBoundsAndCenter);
- }
-
- /*! merges in other binning information */
- __forceinline void merge (const BinInfoT& other, size_t numBins)
- {
-
- for (size_t i=0; i<numBins; i++)
- {
- counts(i) += other.counts(i);
- bounds(i,0).extend(other.bounds(i,0));
- bounds(i,1).extend(other.bounds(i,1));
- bounds(i,2).extend(other.bounds(i,2));
- }
- }
-
- /*! reduces binning information */
- static __forceinline const BinInfoT reduce (const BinInfoT& a, const BinInfoT& b, const size_t numBins = BINS)
- {
- BinInfoT c;
- for (size_t i=0; i<numBins; i++)
- {
- c.counts(i) = a.counts(i)+b.counts(i);
- c.bounds(i,0) = embree::merge(a.bounds(i,0),b.bounds(i,0));
- c.bounds(i,1) = embree::merge(a.bounds(i,1),b.bounds(i,1));
- c.bounds(i,2) = embree::merge(a.bounds(i,2),b.bounds(i,2));
- }
- return c;
- }
-
- /*! finds the best split by scanning binning information */
- __forceinline Split best(const BinMapping<BINS>& mapping, const size_t blocks_shift) const
- {
- /* sweep from right to left and compute parallel prefix of merged bounds */
- vfloat4 rAreas[BINS];
- vuint4 rCounts[BINS];
- vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty;
- for (size_t i=mapping.size()-1; i>0; i--)
- {
- count += counts(i);
- rCounts[i] = count;
- bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx);
- by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by);
- bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz);
- rAreas[i][3] = 0.0f;
- }
- /* sweep from left to right and compute SAH */
- vuint4 blocks_add = (1 << blocks_shift)-1;
- vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0;
- count = 0; bx = empty; by = empty; bz = empty;
- for (size_t i=1; i<mapping.size(); i++, ii+=1)
- {
- count += counts(i-1);
- bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx);
- by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by);
- bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz);
- const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
- const vfloat4 rArea = rAreas[i];
- const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift); // if blocks_shift >=1 then lCount < 4B and could be represented with an vint4, which would allow for faster vfloat4 conversions.
- const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift);
- const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount));
- //const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount)));
-
- vbestPos = select(sah < vbestSAH,ii ,vbestPos);
- vbestSAH = select(sah < vbestSAH,sah,vbestSAH);
- }
-
- /* find best dimension */
- float bestSAH = inf;
- int bestDim = -1;
- int bestPos = 0;
- for (int dim=0; dim<3; dim++)
- {
- /* ignore zero sized dimensions */
- if (unlikely(mapping.invalid(dim)))
- continue;
-
- /* test if this is a better dimension */
- if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
- bestDim = dim;
- bestPos = vbestPos[dim];
- bestSAH = vbestSAH[dim];
- }
- }
- return Split(bestSAH,bestDim,bestPos,mapping);
- }
-
- /*! calculates extended split information */
- __forceinline void getSplitInfo(const BinMapping<BINS>& mapping, const Split& split, SplitInfoT<BBox>& info) const
- {
- if (split.dim == -1) {
- new (&info) SplitInfoT<BBox>(0,empty,0,empty);
- return;
- }
-
- size_t leftCount = 0;
- BBox leftBounds = empty;
- for (size_t i=0; i<(size_t)split.pos; i++) {
- leftCount += counts(i,split.dim);
- leftBounds.extend(bounds(i,split.dim));
- }
- size_t rightCount = 0;
- BBox rightBounds = empty;
- for (size_t i=split.pos; i<mapping.size(); i++) {
- rightCount += counts(i,split.dim);
- rightBounds.extend(bounds(i,split.dim));
- }
- new (&info) SplitInfoT<BBox>(leftCount,leftBounds,rightCount,rightBounds);
- }
-
- /*! gets the number of primitives left of the split */
- __forceinline size_t getLeftCount(const BinMapping<BINS>& mapping, const Split& split) const
- {
- if (unlikely(split.dim == -1)) return -1;
-
- size_t leftCount = 0;
- for (size_t i = 0; i < (size_t)split.pos; i++) {
- leftCount += counts(i, split.dim);
- }
- return leftCount;
- }
-
- /*! gets the number of primitives right of the split */
- __forceinline size_t getRightCount(const BinMapping<BINS>& mapping, const Split& split) const
- {
- if (unlikely(split.dim == -1)) return -1;
-
- size_t rightCount = 0;
- for (size_t i = (size_t)split.pos; i<mapping.size(); i++) {
- rightCount += counts(i, split.dim);
- }
- return rightCount;
- }
-
- private:
- BBox _bounds[BINS][3]; //!< geometry bounds for each bin in each dimension
- vuint4 _counts[BINS]; //!< counts number of primitives that map into the bins
- };
-
-#if defined(__AVX512ER__) // KNL
-
- /*! mapping into bins */
- template<>
- struct BinMapping<16>
- {
- public:
- __forceinline BinMapping() {}
-
- /*! calculates the mapping */
- template<typename PrimInfo>
- __forceinline BinMapping(const PrimInfo& pinfo)
- {
- num = 16;
- const vfloat4 eps = 1E-34f;
- const vfloat4 diag = max(eps,(vfloat4) pinfo.centBounds.size());
- scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
- ofs = (vfloat4) pinfo.centBounds.lower;
- scale16 = scale;
- ofs16 = ofs;
- }
-
- /*! returns number of bins */
- __forceinline size_t size() const { return num; }
-
- __forceinline vint16 bin16(const Vec3fa& p) const {
- return vint16(vint4(floori((vfloat4(p)-ofs)*scale)));
- }
-
- __forceinline vint16 bin16(const vfloat16& p) const {
- return floori((p-ofs16)*scale16);
- }
-
- __forceinline int bin_unsafe(const PrimRef& ref,
- const vint16& vSplitPos,
- const vbool16& splitDimMask) const // FIXME: rename to isLeft
- {
- const vfloat16 lower(*(vfloat4*)&ref.lower);
- const vfloat16 upper(*(vfloat4*)&ref.upper);
- const vfloat16 p = lower + upper;
- const vint16 i = floori((p-ofs16)*scale16);
- return lt(splitDimMask,i,vSplitPos);
- }
-
- /*! returns true if the mapping is invalid in some dimension */
- __forceinline bool invalid(const size_t dim) const {
- return scale[dim] == 0.0f;
- }
-
- public:
- size_t num;
- vfloat4 ofs,scale; //!< linear function that maps to bin ID
- vfloat16 ofs16,scale16; //!< linear function that maps to bin ID
- };
-
- /* 16 bins in-register binner */
- template<typename PrimRef>
- struct __aligned(64) BinInfoT<16,PrimRef,BBox3fa>
- {
- typedef BinSplit<16> Split;
- typedef vbool16 vbool;
- typedef vint16 vint;
- typedef vfloat16 vfloat;
-
- __forceinline BinInfoT() {
- }
-
- __forceinline BinInfoT(EmptyTy) {
- clear();
- }
-
- /*! clears the bin info */
- __forceinline void clear()
- {
- lower[0] = lower[1] = lower[2] = pos_inf;
- upper[0] = upper[1] = upper[2] = neg_inf;
- count[0] = count[1] = count[2] = 0;
- }
-
-
- static __forceinline vfloat16 prefix_area_rl(const vfloat16 min_x,
- const vfloat16 min_y,
- const vfloat16 min_z,
- const vfloat16 max_x,
- const vfloat16 max_y,
- const vfloat16 max_z)
- {
- const vfloat16 r_min_x = reverse_prefix_min(min_x);
- const vfloat16 r_min_y = reverse_prefix_min(min_y);
- const vfloat16 r_min_z = reverse_prefix_min(min_z);
- const vfloat16 r_max_x = reverse_prefix_max(max_x);
- const vfloat16 r_max_y = reverse_prefix_max(max_y);
- const vfloat16 r_max_z = reverse_prefix_max(max_z);
- const vfloat16 dx = r_max_x - r_min_x;
- const vfloat16 dy = r_max_y - r_min_y;
- const vfloat16 dz = r_max_z - r_min_z;
- const vfloat16 area_rl = madd(dx,dy,madd(dx,dz,dy*dz));
- return area_rl;
- }
-
- static __forceinline vfloat16 prefix_area_lr(const vfloat16 min_x,
- const vfloat16 min_y,
- const vfloat16 min_z,
- const vfloat16 max_x,
- const vfloat16 max_y,
- const vfloat16 max_z)
- {
- const vfloat16 r_min_x = prefix_min(min_x);
- const vfloat16 r_min_y = prefix_min(min_y);
- const vfloat16 r_min_z = prefix_min(min_z);
- const vfloat16 r_max_x = prefix_max(max_x);
- const vfloat16 r_max_y = prefix_max(max_y);
- const vfloat16 r_max_z = prefix_max(max_z);
- const vfloat16 dx = r_max_x - r_min_x;
- const vfloat16 dy = r_max_y - r_min_y;
- const vfloat16 dz = r_max_z - r_min_z;
- const vfloat16 area_lr = madd(dx,dy,madd(dx,dz,dy*dz));
- return area_lr;
- }
-
-
- /*! bins an array of primitives */
- __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<16>& mapping)
- {
- if (unlikely(N == 0)) return;
-
- const vfloat16 init_min(pos_inf);
- const vfloat16 init_max(neg_inf);
-
- vfloat16 min_x0,min_x1,min_x2;
- vfloat16 min_y0,min_y1,min_y2;
- vfloat16 min_z0,min_z1,min_z2;
- vfloat16 max_x0,max_x1,max_x2;
- vfloat16 max_y0,max_y1,max_y2;
- vfloat16 max_z0,max_z1,max_z2;
- vuint16 count0,count1,count2;
-
- min_x0 = init_min;
- min_x1 = init_min;
- min_x2 = init_min;
- min_y0 = init_min;
- min_y1 = init_min;
- min_y2 = init_min;
- min_z0 = init_min;
- min_z1 = init_min;
- min_z2 = init_min;
-
- max_x0 = init_max;
- max_x1 = init_max;
- max_x2 = init_max;
- max_y0 = init_max;
- max_y1 = init_max;
- max_y2 = init_max;
- max_z0 = init_max;
- max_z1 = init_max;
- max_z2 = init_max;
-
- count0 = zero;
- count1 = zero;
- count2 = zero;
-
- const vint16 step16(step);
- size_t i;
- for (i=0; i<N-1; i+=2)
- {
- /*! map even and odd primitive to bin */
- const BBox3fa primA = prims[i+0].bounds();
- const vfloat16 centerA = vfloat16((vfloat4)primA.lower) + vfloat16((vfloat4)primA.upper);
- const vint16 binA = mapping.bin16(centerA);
-
- const BBox3fa primB = prims[i+1].bounds();
- const vfloat16 centerB = vfloat16((vfloat4)primB.lower) + vfloat16((vfloat4)primB.upper);
- const vint16 binB = mapping.bin16(centerB);
-
- /* A */
- {
- const vfloat16 b_min_x = prims[i+0].lower.x;
- const vfloat16 b_min_y = prims[i+0].lower.y;
- const vfloat16 b_min_z = prims[i+0].lower.z;
- const vfloat16 b_max_x = prims[i+0].upper.x;
- const vfloat16 b_max_y = prims[i+0].upper.y;
- const vfloat16 b_max_z = prims[i+0].upper.z;
-
- const vint16 bin0 = shuffle<0>(binA);
- const vint16 bin1 = shuffle<1>(binA);
- const vint16 bin2 = shuffle<2>(binA);
-
- const vbool16 m_update_x = step16 == bin0;
- const vbool16 m_update_y = step16 == bin1;
- const vbool16 m_update_z = step16 == bin2;
-
- assert(popcnt((size_t)m_update_x) == 1);
- assert(popcnt((size_t)m_update_y) == 1);
- assert(popcnt((size_t)m_update_z) == 1);
-
- min_x0 = mask_min(m_update_x,min_x0,min_x0,b_min_x);
- min_y0 = mask_min(m_update_x,min_y0,min_y0,b_min_y);
- min_z0 = mask_min(m_update_x,min_z0,min_z0,b_min_z);
- // ------------------------------------------------------------------------
- max_x0 = mask_max(m_update_x,max_x0,max_x0,b_max_x);
- max_y0 = mask_max(m_update_x,max_y0,max_y0,b_max_y);
- max_z0 = mask_max(m_update_x,max_z0,max_z0,b_max_z);
- // ------------------------------------------------------------------------
- min_x1 = mask_min(m_update_y,min_x1,min_x1,b_min_x);
- min_y1 = mask_min(m_update_y,min_y1,min_y1,b_min_y);
- min_z1 = mask_min(m_update_y,min_z1,min_z1,b_min_z);
- // ------------------------------------------------------------------------
- max_x1 = mask_max(m_update_y,max_x1,max_x1,b_max_x);
- max_y1 = mask_max(m_update_y,max_y1,max_y1,b_max_y);
- max_z1 = mask_max(m_update_y,max_z1,max_z1,b_max_z);
- // ------------------------------------------------------------------------
- min_x2 = mask_min(m_update_z,min_x2,min_x2,b_min_x);
- min_y2 = mask_min(m_update_z,min_y2,min_y2,b_min_y);
- min_z2 = mask_min(m_update_z,min_z2,min_z2,b_min_z);
- // ------------------------------------------------------------------------
- max_x2 = mask_max(m_update_z,max_x2,max_x2,b_max_x);
- max_y2 = mask_max(m_update_z,max_y2,max_y2,b_max_y);
- max_z2 = mask_max(m_update_z,max_z2,max_z2,b_max_z);
- // ------------------------------------------------------------------------
- count0 = mask_add(m_update_x,count0,count0,vuint16(1));
- count1 = mask_add(m_update_y,count1,count1,vuint16(1));
- count2 = mask_add(m_update_z,count2,count2,vuint16(1));
- }
-
-
- /* B */
- {
- const vfloat16 b_min_x = prims[i+1].lower.x;
- const vfloat16 b_min_y = prims[i+1].lower.y;
- const vfloat16 b_min_z = prims[i+1].lower.z;
- const vfloat16 b_max_x = prims[i+1].upper.x;
- const vfloat16 b_max_y = prims[i+1].upper.y;
- const vfloat16 b_max_z = prims[i+1].upper.z;
-
- const vint16 bin0 = shuffle<0>(binB);
- const vint16 bin1 = shuffle<1>(binB);
- const vint16 bin2 = shuffle<2>(binB);
-
- const vbool16 m_update_x = step16 == bin0;
- const vbool16 m_update_y = step16 == bin1;
- const vbool16 m_update_z = step16 == bin2;
-
- assert(popcnt((size_t)m_update_x) == 1);
- assert(popcnt((size_t)m_update_y) == 1);
- assert(popcnt((size_t)m_update_z) == 1);
-
- min_x0 = mask_min(m_update_x,min_x0,min_x0,b_min_x);
- min_y0 = mask_min(m_update_x,min_y0,min_y0,b_min_y);
- min_z0 = mask_min(m_update_x,min_z0,min_z0,b_min_z);
- // ------------------------------------------------------------------------
- max_x0 = mask_max(m_update_x,max_x0,max_x0,b_max_x);
- max_y0 = mask_max(m_update_x,max_y0,max_y0,b_max_y);
- max_z0 = mask_max(m_update_x,max_z0,max_z0,b_max_z);
- // ------------------------------------------------------------------------
- min_x1 = mask_min(m_update_y,min_x1,min_x1,b_min_x);
- min_y1 = mask_min(m_update_y,min_y1,min_y1,b_min_y);
- min_z1 = mask_min(m_update_y,min_z1,min_z1,b_min_z);
- // ------------------------------------------------------------------------
- max_x1 = mask_max(m_update_y,max_x1,max_x1,b_max_x);
- max_y1 = mask_max(m_update_y,max_y1,max_y1,b_max_y);
- max_z1 = mask_max(m_update_y,max_z1,max_z1,b_max_z);
- // ------------------------------------------------------------------------
- min_x2 = mask_min(m_update_z,min_x2,min_x2,b_min_x);
- min_y2 = mask_min(m_update_z,min_y2,min_y2,b_min_y);
- min_z2 = mask_min(m_update_z,min_z2,min_z2,b_min_z);
- // ------------------------------------------------------------------------
- max_x2 = mask_max(m_update_z,max_x2,max_x2,b_max_x);
- max_y2 = mask_max(m_update_z,max_y2,max_y2,b_max_y);
- max_z2 = mask_max(m_update_z,max_z2,max_z2,b_max_z);
- // ------------------------------------------------------------------------
- count0 = mask_add(m_update_x,count0,count0,vuint16(1));
- count1 = mask_add(m_update_y,count1,count1,vuint16(1));
- count2 = mask_add(m_update_z,count2,count2,vuint16(1));
- }
-
- }
-
- if (i < N)
- {
- const BBox3fa prim0 = prims[i].bounds();
- const vfloat16 center0 = vfloat16((vfloat4)prim0.lower) + vfloat16((vfloat4)prim0.upper);
- const vint16 bin = mapping.bin16(center0);
-
- const vfloat16 b_min_x = prims[i].lower.x;
- const vfloat16 b_min_y = prims[i].lower.y;
- const vfloat16 b_min_z = prims[i].lower.z;
- const vfloat16 b_max_x = prims[i].upper.x;
- const vfloat16 b_max_y = prims[i].upper.y;
- const vfloat16 b_max_z = prims[i].upper.z;
-
- const vint16 bin0 = shuffle<0>(bin);
- const vint16 bin1 = shuffle<1>(bin);
- const vint16 bin2 = shuffle<2>(bin);
-
- const vbool16 m_update_x = step16 == bin0;
- const vbool16 m_update_y = step16 == bin1;
- const vbool16 m_update_z = step16 == bin2;
-
- assert(popcnt((size_t)m_update_x) == 1);
- assert(popcnt((size_t)m_update_y) == 1);
- assert(popcnt((size_t)m_update_z) == 1);
-
- min_x0 = mask_min(m_update_x,min_x0,min_x0,b_min_x);
- min_y0 = mask_min(m_update_x,min_y0,min_y0,b_min_y);
- min_z0 = mask_min(m_update_x,min_z0,min_z0,b_min_z);
- // ------------------------------------------------------------------------
- max_x0 = mask_max(m_update_x,max_x0,max_x0,b_max_x);
- max_y0 = mask_max(m_update_x,max_y0,max_y0,b_max_y);
- max_z0 = mask_max(m_update_x,max_z0,max_z0,b_max_z);
- // ------------------------------------------------------------------------
- min_x1 = mask_min(m_update_y,min_x1,min_x1,b_min_x);
- min_y1 = mask_min(m_update_y,min_y1,min_y1,b_min_y);
- min_z1 = mask_min(m_update_y,min_z1,min_z1,b_min_z);
- // ------------------------------------------------------------------------
- max_x1 = mask_max(m_update_y,max_x1,max_x1,b_max_x);
- max_y1 = mask_max(m_update_y,max_y1,max_y1,b_max_y);
- max_z1 = mask_max(m_update_y,max_z1,max_z1,b_max_z);
- // ------------------------------------------------------------------------
- min_x2 = mask_min(m_update_z,min_x2,min_x2,b_min_x);
- min_y2 = mask_min(m_update_z,min_y2,min_y2,b_min_y);
- min_z2 = mask_min(m_update_z,min_z2,min_z2,b_min_z);
- // ------------------------------------------------------------------------
- max_x2 = mask_max(m_update_z,max_x2,max_x2,b_max_x);
- max_y2 = mask_max(m_update_z,max_y2,max_y2,b_max_y);
- max_z2 = mask_max(m_update_z,max_z2,max_z2,b_max_z);
- // ------------------------------------------------------------------------
- count0 = mask_add(m_update_x,count0,count0,vuint16(1));
- count1 = mask_add(m_update_y,count1,count1,vuint16(1));
- count2 = mask_add(m_update_z,count2,count2,vuint16(1));
- }
-
- lower[0] = Vec3vf16( min_x0, min_y0, min_z0 );
- lower[1] = Vec3vf16( min_x1, min_y1, min_z1 );
- lower[2] = Vec3vf16( min_x2, min_y2, min_z2 );
-
- upper[0] = Vec3vf16( max_x0, max_y0, max_z0 );
- upper[1] = Vec3vf16( max_x1, max_y1, max_z1 );
- upper[2] = Vec3vf16( max_x2, max_y2, max_z2 );
-
- count[0] = count0;
- count[1] = count1;
- count[2] = count2;
- }
-
- __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<16>& mapping) {
- bin(prims+begin,end-begin,mapping);
- }
-
- /*! merges in other binning information */
- __forceinline void merge (const BinInfoT& other, size_t numBins)
- {
- for (size_t i=0; i<3; i++)
- {
- lower[i] = min(lower[i],other.lower[i]);
- upper[i] = max(upper[i],other.upper[i]);
- count[i] += other.count[i];
- }
- }
-
- /*! reducesr binning information */
- static __forceinline const BinInfoT reduce (const BinInfoT& a, const BinInfoT& b)
- {
- BinInfoT c;
- for (size_t i=0; i<3; i++)
- {
- c.counts[i] = a.counts[i] + b.counts[i];
- c.lower[i] = min(a.lower[i],b.lower[i]);
- c.upper[i] = max(a.upper[i],b.upper[i]);
- }
- return c;
- }
-
- /*! finds the best split by scanning binning information */
- __forceinline Split best(const BinMapping<16>& mapping, const size_t blocks_shift) const
- {
- /* find best dimension */
- float bestSAH = inf;
- int bestDim = -1;
- int bestPos = 0;
- const vuint16 blocks_add = (1 << blocks_shift)-1;
- const vfloat16 inf(pos_inf);
- for (size_t dim=0; dim<3; dim++)
- {
- /* ignore zero sized dimensions */
- if (unlikely(mapping.invalid(dim)))
- continue;
-
- const vfloat16 rArea16 = prefix_area_rl(lower[dim].x,lower[dim].y,lower[dim].z, upper[dim].x,upper[dim].y,upper[dim].z);
- const vfloat16 lArea16 = prefix_area_lr(lower[dim].x,lower[dim].y,lower[dim].z, upper[dim].x,upper[dim].y,upper[dim].z);
- const vuint16 lCount16 = prefix_sum(count[dim]);
- const vuint16 rCount16 = reverse_prefix_sum(count[dim]);
-
- /* compute best split in this dimension */
- const vfloat16 leftArea = lArea16;
- const vfloat16 rightArea = align_shift_right<1>(zero,rArea16);
- const vuint16 lC = lCount16;
- const vuint16 rC = align_shift_right<1>(zero,rCount16);
- const vuint16 leftCount = ( lC + blocks_add) >> blocks_shift;
- const vuint16 rightCount = ( rC + blocks_add) >> blocks_shift;
- const vbool16 valid = (leftArea < inf) & (rightArea < inf) & vbool16(0x7fff); // handles inf entries
- const vfloat16 sah = select(valid,madd(leftArea,vfloat16(leftCount),rightArea*vfloat16(rightCount)),vfloat16(pos_inf));
- /* test if this is a better dimension */
- if (any(sah < vfloat16(bestSAH)))
- {
- const size_t index = select_min(sah);
- assert(index < 15);
- assert(sah[index] < bestSAH);
- bestDim = dim;
- bestPos = index+1;
- bestSAH = sah[index];
- }
- }
-
- return Split(bestSAH,bestDim,bestPos,mapping);
-
- }
-
- /*! calculates extended split information */
- __forceinline void getSplitInfo(const BinMapping<16>& mapping, const Split& split, SplitInfo& info) const
- {
- if (split.dim == -1) {
- new (&info) SplitInfo(0,empty,0,empty);
- return;
- }
- // FIXME: horizontal reduction!
-
- size_t leftCount = 0;
- BBox3fa leftBounds = empty;
- for (size_t i=0; i<(size_t)split.pos; i++) {
- leftCount += count[split.dim][i];
- Vec3fa bounds_lower(lower[split.dim].x[i],lower[split.dim].y[i],lower[split.dim].z[i]);
- Vec3fa bounds_upper(upper[split.dim].x[i],upper[split.dim].y[i],upper[split.dim].z[i]);
- leftBounds.extend(BBox3fa(bounds_lower,bounds_upper));
- }
- size_t rightCount = 0;
- BBox3fa rightBounds = empty;
- for (size_t i=split.pos; i<mapping.size(); i++) {
- rightCount += count[split.dim][i];
- Vec3fa bounds_lower(lower[split.dim].x[i],lower[split.dim].y[i],lower[split.dim].z[i]);
- Vec3fa bounds_upper(upper[split.dim].x[i],upper[split.dim].y[i],upper[split.dim].z[i]);
- rightBounds.extend(BBox3fa(bounds_lower,bounds_upper));
- }
- new (&info) SplitInfo(leftCount,leftBounds,rightCount,rightBounds);
- }
-
- /*! gets the number of primitives left of the split */
- __forceinline size_t getLeftCount(const BinMapping<16>& mapping, const Split& split) const
- {
- if (unlikely(split.dim == -1)) return -1;
-
- size_t leftCount = 0;
- for (size_t i = 0; i < (size_t)split.pos; i++) {
- leftCount += count[split.dim][i];
- }
- return leftCount;
- }
-
- /*! gets the number of primitives right of the split */
- __forceinline size_t getRightCount(const BinMapping<16>& mapping, const Split& split) const
- {
- if (unlikely(split.dim == -1)) return -1;
-
- size_t rightCount = 0;
- for (size_t i = (size_t)split.pos; i<mapping.size(); i++) {
- rightCount += count[split.dim][i];
- }
- return rightCount;
- }
-
- private:
- Vec3vf16 lower[3];
- Vec3vf16 upper[3];
- vuint16 count[3];
- };
-#endif
- }
-
- template<typename BinInfoT, typename BinMapping, typename PrimRef>
- __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping)
- {
- if (likely(end-begin < parallelThreshold)) {
- binner.bin(prims,begin,end,mapping);
- } else {
- binner = parallel_reduce(begin,end,blockSize,binner,
- [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; },
- [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
- }
- }
-
- template<typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef>
- __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
- {
- if (likely(end-begin < parallelThreshold)) {
- binner.bin(prims,begin,end,mapping,binBoundsAndCenter);
- } else {
- binner = parallel_reduce(begin,end,blockSize,binner,
- [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; },
- [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
- }
- }
-
- template<bool parallel, typename BinInfoT, typename BinMapping, typename PrimRef>
- __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping)
- {
- if (!parallel) {
- binner.bin(prims,begin,end,mapping);
- } else {
- binner = parallel_reduce(begin,end,blockSize,binner,
- [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; },
- [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
- }
- }
-
- template<bool parallel, typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef>
- __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
- {
- if (!parallel) {
- binner.bin(prims,begin,end,mapping,binBoundsAndCenter);
- } else {
- binner = parallel_reduce(begin,end,blockSize,binner,
- [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; },
- [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_aligned.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_aligned.h
deleted file mode 100644
index a4c272f015..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_aligned.h
+++ /dev/null
@@ -1,205 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "heuristic_binning.h"
-
-namespace embree
-{
- namespace isa
- {
- struct PrimInfoRange : public CentGeomBBox3fa, public range<size_t>
- {
- __forceinline PrimInfoRange () {
- }
-
- __forceinline PrimInfoRange(const PrimInfo& pinfo)
- : CentGeomBBox3fa(pinfo), range<size_t>(pinfo.begin,pinfo.end) {}
-
- __forceinline PrimInfoRange(EmptyTy)
- : CentGeomBBox3fa(EmptyTy()), range<size_t>(0,0) {}
-
- __forceinline PrimInfoRange (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds)
- : CentGeomBBox3fa(centGeomBounds), range<size_t>(begin,end) {}
-
- __forceinline float leafSAH() const {
- return expectedApproxHalfArea(geomBounds)*float(size());
- }
-
- __forceinline float leafSAH(size_t block_shift) const {
- return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
- }
- };
-
- /*! Performs standard object binning */
- template<typename PrimRef, size_t BINS>
- struct HeuristicArrayBinningSAH
- {
- typedef BinSplit<BINS> Split;
- typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner;
- typedef range<size_t> Set;
-
-#if defined(__AVX512ER__) // KNL
- static const size_t PARALLEL_THRESHOLD = 4*768;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 768;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 768;
-#else
- static const size_t PARALLEL_THRESHOLD = 3 * 1024;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
-#endif
- __forceinline HeuristicArrayBinningSAH ()
- : prims(nullptr) {}
-
- /*! remember prim array */
- __forceinline HeuristicArrayBinningSAH (PrimRef* prims)
- : prims(prims) {}
-
- /*! finds the best split */
- __noinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize)
- {
- if (likely(pinfo.size() < PARALLEL_THRESHOLD))
- return find_template<false>(pinfo,logBlockSize);
- else
- return find_template<true>(pinfo,logBlockSize);
- }
-
- template<bool parallel>
- __forceinline const Split find_template(const PrimInfoRange& pinfo, const size_t logBlockSize)
- {
- Binner binner(empty);
- const BinMapping<BINS> mapping(pinfo);
- bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping);
- return binner.best(mapping,logBlockSize);
- }
-
- /*! array partitioning */
- __forceinline void split(const Split& split, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
- {
- if (likely(pinfo.size() < PARALLEL_THRESHOLD))
- split_template<false>(split,pinfo,linfo,rinfo);
- else
- split_template<true>(split,pinfo,linfo,rinfo);
- }
-
- template<bool parallel>
- __forceinline void split_template(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
- {
- if (!split.valid()) {
- deterministic_order(set);
- return splitFallback(set,lset,rset);
- }
-
- const size_t begin = set.begin();
- const size_t end = set.end();
- CentGeomBBox3fa local_left(empty);
- CentGeomBBox3fa local_right(empty);
- const unsigned int splitPos = split.pos;
- const unsigned int splitDim = split.dim;
- const unsigned int splitDimMask = (unsigned int)1 << splitDim;
-
- const typename Binner::vint vSplitPos(splitPos);
- const typename Binner::vbool vSplitMask(splitDimMask);
- auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
-
- size_t center = 0;
- if (!parallel)
- center = serial_partitioning(prims,begin,end,local_left,local_right,isLeft,
- [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); });
- else
- center = parallel_partitioning(
- prims,begin,end,EmptyTy(),local_left,local_right,isLeft,
- [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); },
- [] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); },
- PARALLEL_PARTITION_BLOCK_SIZE);
-
- new (&lset) PrimInfoRange(begin,center,local_left);
- new (&rset) PrimInfoRange(center,end,local_right);
- assert(area(lset.geomBounds) >= 0.0f);
- assert(area(rset.geomBounds) >= 0.0f);
- }
-
- void deterministic_order(const PrimInfoRange& pinfo)
- {
- /* required as parallel partition destroys original primitive order */
- std::sort(&prims[pinfo.begin()],&prims[pinfo.end()]);
- }
-
- void splitFallback(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
- {
- const size_t begin = pinfo.begin();
- const size_t end = pinfo.end();
- const size_t center = (begin + end)/2;
-
- CentGeomBBox3fa left(empty);
- for (size_t i=begin; i<center; i++)
- left.extend_center2(prims[i]);
- new (&linfo) PrimInfoRange(begin,center,left);
-
- CentGeomBBox3fa right(empty);
- for (size_t i=center; i<end; i++)
- right.extend_center2(prims[i]);
- new (&rinfo) PrimInfoRange(center,end,right);
- }
-
- void splitByGeometry(const range<size_t>& range, PrimInfoRange& linfo, PrimInfoRange& rinfo)
- {
- assert(range.size() > 1);
- CentGeomBBox3fa left(empty);
- CentGeomBBox3fa right(empty);
- unsigned int geomID = prims[range.begin()].geomID();
- size_t center = serial_partitioning(prims,range.begin(),range.end(),left,right,
- [&] ( const PrimRef& prim ) { return prim.geomID() == geomID; },
- [ ] ( CentGeomBBox3fa& a, const PrimRef& ref ) { a.extend_center2(ref); });
-
- new (&linfo) PrimInfoRange(range.begin(),center,left);
- new (&rinfo) PrimInfoRange(center,range.end(),right);
- }
-
- private:
- PrimRef* const prims;
- };
-
- /*! Performs standard object binning */
- template<typename PrimRefMB, size_t BINS>
- struct HeuristicArrayBinningMB
- {
- typedef BinSplit<BINS> Split;
- typedef typename PrimRefMB::BBox BBox;
- typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner;
- static const size_t PARALLEL_THRESHOLD = 3 * 1024;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
-
- /*! finds the best split */
- const Split find(const SetMB& set, const size_t logBlockSize)
- {
- ObjectBinner binner(empty);
- const BinMapping<BINS> mapping(set.size(),set.centBounds);
- bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping);
- Split osplit = binner.best(mapping,logBlockSize);
- osplit.sah *= set.time_range.size();
- if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split
- return osplit;
- }
-
- /*! array partitioning */
- __forceinline void split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfoMB left = empty;
- PrimInfoMB right = empty;
- const vint4 vSplitPos(split.pos);
- const vbool4 vSplitMask(1 << split.dim);
- auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref) < vSplitPos) & vSplitMask); };
- auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); };
- auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); };
- size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD);
- new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);
- new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_unaligned.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_unaligned.h
deleted file mode 100644
index 1370244586..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_unaligned.h
+++ /dev/null
@@ -1,302 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "heuristic_binning.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Performs standard object binning */
- template<typename PrimRef, size_t BINS>
- struct UnalignedHeuristicArrayBinningSAH
- {
- typedef BinSplit<BINS> Split;
- typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner;
- typedef range<size_t> Set;
-
- __forceinline UnalignedHeuristicArrayBinningSAH () // FIXME: required?
- : scene(nullptr), prims(nullptr) {}
-
- /*! remember prim array */
- __forceinline UnalignedHeuristicArrayBinningSAH (Scene* scene, PrimRef* prims)
- : scene(scene), prims(prims) {}
-
- const LinearSpace3fa computeAlignedSpace(const range<size_t>& set)
- {
- Vec3fa axis(0,0,1);
- uint64_t bestGeomPrimID = -1;
-
- /*! find curve with minimum ID that defines valid direction */
- for (size_t i=set.begin(); i<set.end(); i++)
- {
- const unsigned int geomID = prims[i].geomID();
- const unsigned int primID = prims[i].primID();
- const uint64_t geomprimID = prims[i].ID64();
- if (geomprimID >= bestGeomPrimID) continue;
- const Vec3fa axis1 = scene->get(geomID)->computeDirection(primID);
- if (sqr_length(axis1) > 1E-18f) {
- axis = normalize(axis1);
- bestGeomPrimID = geomprimID;
- }
- }
- return frame(axis).transposed();
- }
-
- const PrimInfo computePrimInfo(const range<size_t>& set, const LinearSpace3fa& space)
- {
- auto computeBounds = [&](const range<size_t>& r) -> CentGeomBBox3fa
- {
- CentGeomBBox3fa bounds(empty);
- for (size_t i=r.begin(); i<r.end(); i++) {
- Geometry* mesh = scene->get(prims[i].geomID());
- bounds.extend(mesh->vbounds(space,prims[i].primID()));
- }
- return bounds;
- };
-
- const CentGeomBBox3fa bounds = parallel_reduce(set.begin(), set.end(), size_t(1024), size_t(4096),
- CentGeomBBox3fa(empty), computeBounds, CentGeomBBox3fa::merge2);
-
- return PrimInfo(set.begin(),set.end(),bounds);
- }
-
- struct BinBoundsAndCenter
- {
- __forceinline BinBoundsAndCenter(Scene* scene, const LinearSpace3fa& space)
- : scene(scene), space(space) {}
-
- /*! returns center for binning */
- __forceinline Vec3fa binCenter(const PrimRef& ref) const
- {
- Geometry* mesh = (Geometry*) scene->get(ref.geomID());
- BBox3fa bounds = mesh->vbounds(space,ref.primID());
- return embree::center2(bounds);
- }
-
- /*! returns bounds and centroid used for binning */
- __forceinline void binBoundsAndCenter(const PrimRef& ref, BBox3fa& bounds_o, Vec3fa& center_o) const
- {
- Geometry* mesh = (Geometry*) scene->get(ref.geomID());
- BBox3fa bounds = mesh->vbounds(space,ref.primID());
- bounds_o = bounds;
- center_o = embree::center2(bounds);
- }
-
- private:
- Scene* scene;
- const LinearSpace3fa space;
- };
-
- /*! finds the best split */
- __forceinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize, const LinearSpace3fa& space)
- {
- if (likely(pinfo.size() < 10000))
- return find_template<false>(pinfo,logBlockSize,space);
- else
- return find_template<true>(pinfo,logBlockSize,space);
- }
-
- /*! finds the best split */
- template<bool parallel>
- const Split find_template(const PrimInfoRange& set, const size_t logBlockSize, const LinearSpace3fa& space)
- {
- Binner binner(empty);
- const BinMapping<BINS> mapping(set);
- BinBoundsAndCenter binBoundsAndCenter(scene,space);
- bin_serial_or_parallel<parallel>(binner,prims,set.begin(),set.end(),size_t(4096),mapping,binBoundsAndCenter);
- return binner.best(mapping,logBlockSize);
- }
-
- /*! array partitioning */
- __forceinline void split(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
- {
- if (likely(set.size() < 10000))
- split_template<false>(split,space,set,lset,rset);
- else
- split_template<true>(split,space,set,lset,rset);
- }
-
- /*! array partitioning */
- template<bool parallel>
- __forceinline void split_template(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
- {
- if (!split.valid()) {
- deterministic_order(set);
- return splitFallback(set,lset,rset);
- }
-
- const size_t begin = set.begin();
- const size_t end = set.end();
- CentGeomBBox3fa local_left(empty);
- CentGeomBBox3fa local_right(empty);
- const int splitPos = split.pos;
- const int splitDim = split.dim;
- BinBoundsAndCenter binBoundsAndCenter(scene,space);
-
- size_t center = 0;
- if (likely(set.size() < 10000))
- center = serial_partitioning(prims,begin,end,local_left,local_right,
- [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; },
- [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); });
- else
- center = parallel_partitioning(prims,begin,end,EmptyTy(),local_left,local_right,
- [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; },
- [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); },
- [] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); },
- 128);
-
- new (&lset) PrimInfoRange(begin,center,local_left);
- new (&rset) PrimInfoRange(center,end,local_right);
- assert(area(lset.geomBounds) >= 0.0f);
- assert(area(rset.geomBounds) >= 0.0f);
- }
-
- void deterministic_order(const range<size_t>& set)
- {
- /* required as parallel partition destroys original primitive order */
- std::sort(&prims[set.begin()],&prims[set.end()]);
- }
-
- void splitFallback(const range<size_t>& set, PrimInfoRange& lset, PrimInfoRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- const size_t center = (begin + end)/2;
-
- CentGeomBBox3fa left(empty);
- for (size_t i=begin; i<center; i++)
- left.extend_center2(prims[i]);
- new (&lset) PrimInfoRange(begin,center,left);
-
- CentGeomBBox3fa right(empty);
- for (size_t i=center; i<end; i++)
- right.extend_center2(prims[i]);
- new (&rset) PrimInfoRange(center,end,right);
- }
-
- private:
- Scene* const scene;
- PrimRef* const prims;
- };
-
- /*! Performs standard object binning */
- template<typename PrimRefMB, size_t BINS>
- struct UnalignedHeuristicArrayBinningMB
- {
- typedef BinSplit<BINS> Split;
- typedef typename PrimRefMB::BBox BBox;
- typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner;
-
- static const size_t PARALLEL_THRESHOLD = 3 * 1024;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
-
- UnalignedHeuristicArrayBinningMB(Scene* scene)
- : scene(scene) {}
-
- const LinearSpace3fa computeAlignedSpaceMB(Scene* scene, const SetMB& set)
- {
- Vec3fa axis0(0,0,1);
- uint64_t bestGeomPrimID = -1;
-
- /*! find curve with minimum ID that defines valid direction */
- for (size_t i=set.begin(); i<set.end(); i++)
- {
- const PrimRefMB& prim = (*set.prims)[i];
- const unsigned int geomID = prim.geomID();
- const unsigned int primID = prim.primID();
- const uint64_t geomprimID = prim.ID64();
- if (geomprimID >= bestGeomPrimID) continue;
-
- const Geometry* mesh = scene->get(geomID);
- const range<int> tbounds = mesh->timeSegmentRange(set.time_range);
- if (tbounds.size() == 0) continue;
-
- const size_t t = (tbounds.begin()+tbounds.end())/2;
- const Vec3fa axis1 = mesh->computeDirection(primID,t);
- if (sqr_length(axis1) > 1E-18f) {
- axis0 = normalize(axis1);
- bestGeomPrimID = geomprimID;
- }
- }
-
- return frame(axis0).transposed();
- }
-
- struct BinBoundsAndCenter
- {
- __forceinline BinBoundsAndCenter(Scene* scene, BBox1f time_range, const LinearSpace3fa& space)
- : scene(scene), time_range(time_range), space(space) {}
-
- /*! returns center for binning */
- template<typename PrimRef>
- __forceinline Vec3fa binCenter(const PrimRef& ref) const
- {
- Geometry* mesh = scene->get(ref.geomID());
- LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
- return center2(lbounds.interpolate(0.5f));
- }
-
- /*! returns bounds and centroid used for binning */
- __noinline void binBoundsAndCenter (const PrimRefMB& ref, BBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX
- {
- Geometry* mesh = scene->get(ref.geomID());
- LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
- bounds_o = lbounds.interpolate(0.5f);
- center_o = center2(bounds_o);
- }
-
- /*! returns bounds and centroid used for binning */
- __noinline void binBoundsAndCenter (const PrimRefMB& ref, LBBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX
- {
- Geometry* mesh = scene->get(ref.geomID());
- LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
- bounds_o = lbounds;
- center_o = center2(lbounds.interpolate(0.5f));
- }
-
- private:
- Scene* scene;
- BBox1f time_range;
- const LinearSpace3fa space;
- };
-
- /*! finds the best split */
- const Split find(const SetMB& set, const size_t logBlockSize, const LinearSpace3fa& space)
- {
- BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space);
- ObjectBinner binner(empty);
- const BinMapping<BINS> mapping(set.size(),set.centBounds);
- bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping,binBoundsAndCenter);
- Split osplit = binner.best(mapping,logBlockSize);
- osplit.sah *= set.time_range.size();
- if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split
- return osplit;
- }
-
- /*! array partitioning */
- __forceinline void split(const Split& split, const LinearSpace3fa& space, const SetMB& set, SetMB& lset, SetMB& rset)
- {
- BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space);
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfoMB left = empty;
- PrimInfoMB right = empty;
- const vint4 vSplitPos(split.pos);
- const vbool4 vSplitMask(1 << split.dim);
- auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref,binBoundsAndCenter) < vSplitPos) & vSplitMask); };
- auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); };
- auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); };
- size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD);
- new (&lset) SetMB(left,set.prims,range<size_t>(begin,center),set.time_range);
- new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
- }
-
- private:
- Scene* scene;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_openmerge_array.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_openmerge_array.h
deleted file mode 100644
index 21f18c0208..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/heuristic_openmerge_array.h
+++ /dev/null
@@ -1,443 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-// TODO:
-// - adjust parallel build thresholds
-// - openNodesBasedOnExtend should consider max extended size
-
-#pragma once
-
-#include "heuristic_binning.h"
-#include "heuristic_spatial.h"
-
-/* stop opening of all bref.geomIDs are the same */
-#define EQUAL_GEOMID_STOP_CRITERIA 1
-
-/* 10% spatial extend threshold */
-#define MAX_EXTEND_THRESHOLD 0.1f
-
-/* maximum is 8 children */
-#define MAX_OPENED_CHILD_NODES 8
-
-/* open until all build refs are below threshold size in one step */
-#define USE_LOOP_OPENING 0
-
-namespace embree
-{
- namespace isa
- {
- /*! Performs standard object binning */
- template<typename NodeOpenerFunc, typename PrimRef, size_t OBJECT_BINS>
- struct HeuristicArrayOpenMergeSAH
- {
- typedef BinSplit<OBJECT_BINS> Split;
- typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> Binner;
-
- static const size_t PARALLEL_THRESHOLD = 1024;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 512;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
-
- static const size_t MOVE_STEP_SIZE = 64;
- static const size_t CREATE_SPLITS_STEP_SIZE = 128;
-
- __forceinline HeuristicArrayOpenMergeSAH ()
- : prims0(nullptr) {}
-
- /*! remember prim array */
- __forceinline HeuristicArrayOpenMergeSAH (const NodeOpenerFunc& nodeOpenerFunc, PrimRef* prims0, size_t max_open_size)
- : prims0(prims0), nodeOpenerFunc(nodeOpenerFunc), max_open_size(max_open_size)
- {
- assert(max_open_size <= MAX_OPENED_CHILD_NODES);
- }
-
- struct OpenHeuristic
- {
- __forceinline OpenHeuristic( const PrimInfoExtRange& pinfo )
- {
- const Vec3fa diag = pinfo.geomBounds.size();
- dim = maxDim(diag);
- assert(diag[dim] > 0.0f);
- inv_max_extend = 1.0f / diag[dim];
- }
-
- __forceinline bool operator () ( PrimRef& prim ) const {
- return !prim.node.isLeaf() && prim.bounds().size()[dim] * inv_max_extend > MAX_EXTEND_THRESHOLD;
- }
-
- private:
- size_t dim;
- float inv_max_extend;
- };
-
- /*! compute extended ranges */
- __forceinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight)
- {
- assert(set.ext_range_size() > 0);
- const float left_factor = (float)lweight / (lweight + rweight);
- const size_t ext_range_size = set.ext_range_size();
- const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size);
- const size_t right_ext_range_size = ext_range_size - left_ext_range_size;
- lset.set_ext_range(lset.end() + left_ext_range_size);
- rset.set_ext_range(rset.end() + right_ext_range_size);
- }
-
- /*! move ranges */
- __forceinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t left_ext_range_size = lset.ext_range_size();
- const size_t right_size = rset.size();
-
- /* has the left child an extended range? */
- if (left_ext_range_size > 0)
- {
- /* left extended range smaller than right range ? */
- if (left_ext_range_size < right_size)
- {
- /* only move a small part of the beginning of the right range to the end */
- parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++)
- prims0[i+right_size] = prims0[i];
- });
- }
- else
- {
- /* no overlap, move entire right range to new location, can be made fully parallel */
- parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++)
- prims0[i+left_ext_range_size] = prims0[i];
- });
- }
- /* update right range */
- assert(rset.ext_end() + left_ext_range_size == set.ext_end());
- rset.move_right(left_ext_range_size);
- }
- }
-
- /* estimates the extra space required when opening, and checks if all primitives are from same geometry */
- __noinline std::pair<size_t,bool> getProperties(const PrimInfoExtRange& set)
- {
- const OpenHeuristic heuristic(set);
- const unsigned int geomID = prims0[set.begin()].geomID();
-
- auto body = [&] (const range<size_t>& r) -> std::pair<size_t,bool> {
- bool commonGeomID = true;
- size_t opens = 0;
- for (size_t i=r.begin(); i<r.end(); i++) {
- commonGeomID &= prims0[i].geomID() == geomID;
- if (heuristic(prims0[i]))
- opens += prims0[i].node.getN()-1; // coarse approximation
- }
- return std::pair<size_t,bool>(opens,commonGeomID);
- };
- auto reduction = [&] (const std::pair<size_t,bool>& b0, const std::pair<size_t,bool>& b1) -> std::pair<size_t,bool> {
- return std::pair<size_t,bool>(b0.first+b1.first,b0.second && b1.second);
- };
- return parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,std::pair<size_t,bool>(0,true),body,reduction);
- }
-
- // FIXME: should consider maximum available extended size
- __noinline void openNodesBasedOnExtend(PrimInfoExtRange& set)
- {
- const OpenHeuristic heuristic(set);
- const size_t ext_range_start = set.end();
-
- if (false && set.size() < PARALLEL_THRESHOLD)
- {
- size_t extra_elements = 0;
- for (size_t i=set.begin(); i<set.end(); i++)
- {
- if (heuristic(prims0[i]))
- {
- PrimRef tmp[MAX_OPENED_CHILD_NODES];
- const size_t n = nodeOpenerFunc(prims0[i],tmp);
- assert(extra_elements + n-1 <= set.ext_range_size());
- for (size_t j=0; j<n; j++)
- set.extend_center2(tmp[j]);
-
- prims0[i] = tmp[0];
- for (size_t j=1; j<n; j++)
- prims0[ext_range_start+extra_elements+j-1] = tmp[j];
- extra_elements += n-1;
- }
- }
- set._end += extra_elements;
- }
- else
- {
- std::atomic<size_t> ext_elements;
- ext_elements.store(0);
- PrimInfo info = parallel_reduce( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, PrimInfo(empty), [&](const range<size_t>& r) -> PrimInfo {
- PrimInfo info(empty);
- for (size_t i=r.begin(); i<r.end(); i++)
- if (heuristic(prims0[i]))
- {
- PrimRef tmp[MAX_OPENED_CHILD_NODES];
- const size_t n = nodeOpenerFunc(prims0[i],tmp);
- const size_t ID = ext_elements.fetch_add(n-1);
- assert(ID + n-1 <= set.ext_range_size());
-
- for (size_t j=0; j<n; j++)
- info.extend_center2(tmp[j]);
-
- prims0[i] = tmp[0];
- for (size_t j=1; j<n; j++)
- prims0[ext_range_start+ID+j-1] = tmp[j];
- }
- return info;
- }, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
- set.centBounds.extend(info.centBounds);
- assert(ext_elements.load() <= set.ext_range_size());
- set._end += ext_elements.load();
- }
- }
-
- __noinline void openNodesBasedOnExtendLoop(PrimInfoExtRange& set, const size_t est_new_elements)
- {
- const OpenHeuristic heuristic(set);
- size_t next_iteration_extra_elements = est_new_elements;
-
- while (next_iteration_extra_elements <= set.ext_range_size())
- {
- next_iteration_extra_elements = 0;
- size_t extra_elements = 0;
- const size_t ext_range_start = set.end();
-
- for (size_t i=set.begin(); i<set.end(); i++)
- {
- if (heuristic(prims0[i]))
- {
- PrimRef tmp[MAX_OPENED_CHILD_NODES];
- const size_t n = nodeOpenerFunc(prims0[i],tmp);
- assert(extra_elements + n-1 <= set.ext_range_size());
- for (size_t j=0;j<n;j++)
- set.extend_center2(tmp[j]);
-
- prims0[i] = tmp[0];
- for (size_t j=1;j<n;j++)
- prims0[ext_range_start+extra_elements+j-1] = tmp[j];
- extra_elements += n-1;
-
- for (size_t j=0; j<n; j++)
- if (heuristic(tmp[j]))
- next_iteration_extra_elements += tmp[j].node.getN()-1; // coarse approximation
-
- }
- }
- assert( extra_elements <= set.ext_range_size());
- set._end += extra_elements;
-
- for (size_t i=set.begin();i<set.end();i++)
- assert(prims0[i].numPrimitives() > 0);
-
- if (unlikely(next_iteration_extra_elements == 0)) break;
- }
- }
-
- __noinline const Split find(PrimInfoExtRange& set, const size_t logBlockSize)
- {
- /* single element */
- if (set.size() <= 1)
- return Split();
-
- /* disable opening if there is no overlap */
- const size_t D = 4;
- if (unlikely(set.has_ext_range() && set.size() <= D))
- {
- bool disjoint = true;
- for (size_t j=set.begin(); j<set.end()-1; j++) {
- for (size_t i=set.begin()+1; i<set.end(); i++) {
- if (conjoint(prims0[j].bounds(),prims0[i].bounds())) {
- disjoint = false; break;
- }
- }
- }
- if (disjoint) set.set_ext_range(set.end()); /* disables opening */
- }
-
- std::pair<size_t,bool> p(0,false);
-
- /* disable opening when all primitives are from same geometry */
- if (unlikely(set.has_ext_range()))
- {
- p = getProperties(set);
-#if EQUAL_GEOMID_STOP_CRITERIA == 1
- if (p.second) set.set_ext_range(set.end()); /* disable opening */
-#endif
- }
-
- /* open nodes when we have sufficient space available */
- if (unlikely(set.has_ext_range()))
- {
-#if USE_LOOP_OPENING == 1
- openNodesBasedOnExtendLoop(set,p.first);
-#else
- if (p.first <= set.ext_range_size())
- openNodesBasedOnExtend(set);
-#endif
-
- /* disable opening when unsufficient space for opening a node available */
- if (set.ext_range_size() < max_open_size-1)
- set.set_ext_range(set.end()); /* disable opening */
- }
-
- /* find best split */
- return object_find(set,logBlockSize);
- }
-
-
- /*! finds the best object split */
- __forceinline const Split object_find(const PrimInfoExtRange& set,const size_t logBlockSize)
- {
- if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize);
- else return parallel_object_find (set,logBlockSize);
- }
-
- /*! finds the best object split */
- __noinline const Split sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
- {
- Binner binner(empty);
- const BinMapping<OBJECT_BINS> mapping(set.centBounds);
- binner.bin(prims0,set.begin(),set.end(),mapping);
- return binner.best(mapping,logBlockSize);
- }
-
- /*! finds the best split */
- __noinline const Split parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
- {
- Binner binner(empty);
- const BinMapping<OBJECT_BINS> mapping(set.centBounds);
- const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
- auto body = [&] (const range<size_t>& r) -> Binner {
- Binner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner;
- };
- auto reduction = [&] (const Binner& b0, const Binner& b1) -> Binner {
- Binner r = b0; r.merge(b1,_mapping.size()); return r;
- };
- binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,body,reduction);
- return binner.best(mapping,logBlockSize);
- }
-
- /*! array partitioning */
- __noinline void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- PrimInfoExtRange set = set_i;
-
- /* valid split */
- if (unlikely(!split.valid())) {
- deterministic_order(set);
- splitFallback(set,lset,rset);
- return;
- }
-
- std::pair<size_t,size_t> ext_weights(0,0);
-
- /* object split */
- if (likely(set.size() < PARALLEL_THRESHOLD))
- ext_weights = sequential_object_split(split,set,lset,rset);
- else
- ext_weights = parallel_object_split(split,set,lset,rset);
-
- /* if we have an extended range, set extended child ranges and move right split range */
- if (unlikely(set.has_ext_range()))
- {
- setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second);
- moveExtentedRange(set,lset,rset);
- }
- }
-
- /*! array partitioning */
- std::pair<size_t,size_t> sequential_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfo local_left(empty);
- PrimInfo local_right(empty);
- const unsigned int splitPos = split.pos;
- const unsigned int splitDim = split.dim;
- const unsigned int splitDimMask = (unsigned int)1 << splitDim;
-
- const vint4 vSplitPos(splitPos);
- const vbool4 vSplitMask( (int)splitDimMask );
-
- size_t center = serial_partitioning(prims0,
- begin,end,local_left,local_right,
- [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); },
- [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); });
-
- new (&lset) PrimInfoExtRange(begin,center,center,local_left);
- new (&rset) PrimInfoExtRange(center,end,end,local_right);
- assert(area(lset.geomBounds) >= 0.0f);
- assert(area(rset.geomBounds) >= 0.0f);
- return std::pair<size_t,size_t>(local_left.size(),local_right.size());
- }
-
- /*! array partitioning */
- __noinline std::pair<size_t,size_t> parallel_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfo left(empty);
- PrimInfo right(empty);
- const unsigned int splitPos = split.pos;
- const unsigned int splitDim = split.dim;
- const unsigned int splitDimMask = (unsigned int)1 << splitDim;
-
- const vint4 vSplitPos(splitPos);
- const vbool4 vSplitMask( (int)splitDimMask );
- auto isLeft = [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
-
- const size_t center = parallel_partitioning(
- prims0,begin,end,EmptyTy(),left,right,isLeft,
- [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); },
- [] (PrimInfo& pinfo0,const PrimInfo& pinfo1) { pinfo0.merge(pinfo1); },
- PARALLEL_PARTITION_BLOCK_SIZE);
-
- new (&lset) PrimInfoExtRange(begin,center,center,left);
- new (&rset) PrimInfoExtRange(center,end,end,right);
- assert(area(lset.geomBounds) >= 0.0f);
- assert(area(rset.geomBounds) >= 0.0f);
-
- return std::pair<size_t,size_t>(left.size(),right.size());
- }
-
- void deterministic_order(const extended_range<size_t>& set)
- {
- /* required as parallel partition destroys original primitive order */
- std::sort(&prims0[set.begin()],&prims0[set.end()]);
- }
-
- __forceinline void splitFallback(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- const size_t center = (begin + end)/2;
-
- PrimInfo left(empty);
- for (size_t i=begin; i<center; i++)
- left.add_center2(prims0[i]);
-
- const size_t lweight = left.end;
-
- PrimInfo right(empty);
- for (size_t i=center; i<end; i++)
- right.add_center2(prims0[i]);
-
- const size_t rweight = right.end;
- new (&lset) PrimInfoExtRange(begin,center,center,left);
- new (&rset) PrimInfoExtRange(center,end,end,right);
-
- /* if we have an extended range */
- if (set.has_ext_range())
- {
- setExtentedRanges(set,lset,rset,lweight,rweight);
- moveExtentedRange(set,lset,rset);
- }
- }
-
- private:
- PrimRef* const prims0;
- const NodeOpenerFunc& nodeOpenerFunc;
- size_t max_open_size;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial.h
deleted file mode 100644
index d8ca6cb92c..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial.h
+++ /dev/null
@@ -1,414 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/scene.h"
-#include "priminfo.h"
-
-namespace embree
-{
- static const unsigned int RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS = 5;
-
- namespace isa
- {
-
- /*! mapping into bins */
- template<size_t BINS>
- struct SpatialBinMapping
- {
- public:
- __forceinline SpatialBinMapping() {}
-
- /*! calculates the mapping */
- __forceinline SpatialBinMapping(const CentGeomBBox3fa& pinfo)
- {
- const vfloat4 lower = (vfloat4) pinfo.geomBounds.lower;
- const vfloat4 upper = (vfloat4) pinfo.geomBounds.upper;
- const vfloat4 eps = 128.0f*vfloat4(ulp)*max(abs(lower),abs(upper));
- const vfloat4 diag = max(eps,(vfloat4) pinfo.geomBounds.size());
- scale = select(upper-lower <= eps,vfloat4(0.0f),vfloat4(BINS)/diag);
- ofs = (vfloat4) pinfo.geomBounds.lower;
- inv_scale = 1.0f / scale;
- }
-
- /*! slower but safe binning */
- __forceinline vint4 bin(const Vec3fa& p) const
- {
- const vint4 i = floori((vfloat4(p)-ofs)*scale);
- return clamp(i,vint4(0),vint4(BINS-1));
- }
-
- __forceinline std::pair<vint4,vint4> bin(const BBox3fa& b) const
- {
-#if defined(__AVX__)
- const vfloat8 ofs8(ofs);
- const vfloat8 scale8(scale);
- const vint8 lu = floori((vfloat8::loadu(&b)-ofs8)*scale8);
- const vint8 c_lu = clamp(lu,vint8(zero),vint8(BINS-1));
- return std::pair<vint4,vint4>(extract4<0>(c_lu),extract4<1>(c_lu));
-#else
- const vint4 lower = floori((vfloat4(b.lower)-ofs)*scale);
- const vint4 upper = floori((vfloat4(b.upper)-ofs)*scale);
- const vint4 c_lower = clamp(lower,vint4(0),vint4(BINS-1));
- const vint4 c_upper = clamp(upper,vint4(0),vint4(BINS-1));
- return std::pair<vint4,vint4>(c_lower,c_upper);
-#endif
- }
-
-
- /*! calculates left spatial position of bin */
- __forceinline float pos(const size_t bin, const size_t dim) const {
- return madd(float(bin),inv_scale[dim],ofs[dim]);
- }
-
- /*! calculates left spatial position of bin */
- template<size_t N>
- __forceinline vfloat<N> posN(const vfloat<N> bin, const size_t dim) const {
- return madd(bin,vfloat<N>(inv_scale[dim]),vfloat<N>(ofs[dim]));
- }
-
- /*! returns true if the mapping is invalid in some dimension */
- __forceinline bool invalid(const size_t dim) const {
- return scale[dim] == 0.0f;
- }
-
- public:
- vfloat4 ofs,scale,inv_scale; //!< linear function that maps to bin ID
- };
-
- /*! stores all information required to perform some split */
- template<size_t BINS>
- struct SpatialBinSplit
- {
- /*! construct an invalid split by default */
- __forceinline SpatialBinSplit()
- : sah(inf), dim(-1), pos(0), left(-1), right(-1), factor(1.0f) {}
-
- /*! constructs specified split */
- __forceinline SpatialBinSplit(float sah, int dim, int pos, const SpatialBinMapping<BINS>& mapping)
- : sah(sah), dim(dim), pos(pos), left(-1), right(-1), factor(1.0f), mapping(mapping) {}
-
- /*! constructs specified split */
- __forceinline SpatialBinSplit(float sah, int dim, int pos, int left, int right, float factor, const SpatialBinMapping<BINS>& mapping)
- : sah(sah), dim(dim), pos(pos), left(left), right(right), factor(factor), mapping(mapping) {}
-
- /*! tests if this split is valid */
- __forceinline bool valid() const { return dim != -1; }
-
- /*! calculates surface area heuristic for performing the split */
- __forceinline float splitSAH() const { return sah; }
-
- /*! stream output */
- friend embree_ostream operator<<(embree_ostream cout, const SpatialBinSplit& split) {
- return cout << "SpatialBinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << ", left = " << split.left << ", right = " << split.right << ", factor = " << split.factor << "}";
- }
-
- public:
- float sah; //!< SAH cost of the split
- int dim; //!< split dimension
- int pos; //!< split position
- int left; //!< number of elements on the left side
- int right; //!< number of elements on the right side
- float factor; //!< factor splitting the extended range
- SpatialBinMapping<BINS> mapping; //!< mapping into bins
- };
-
- /*! stores all binning information */
- template<size_t BINS, typename PrimRef>
- struct __aligned(64) SpatialBinInfo
- {
- SpatialBinInfo() {
- }
-
- __forceinline SpatialBinInfo(EmptyTy) {
- clear();
- }
-
- /*! clears the bin info */
- __forceinline void clear()
- {
- for (size_t i=0; i<BINS; i++) {
- bounds[i][0] = bounds[i][1] = bounds[i][2] = empty;
- numBegin[i] = numEnd[i] = 0;
- }
- }
-
- /*! adds binning data */
- __forceinline void add(const size_t dim,
- const size_t beginID,
- const size_t endID,
- const size_t binID,
- const BBox3fa &b,
- const size_t n = 1)
- {
- assert(beginID < BINS);
- assert(endID < BINS);
- assert(binID < BINS);
-
- numBegin[beginID][dim]+=(unsigned int)n;
- numEnd [endID][dim]+=(unsigned int)n;
- bounds [binID][dim].extend(b);
- }
-
- /*! extends binning bounds */
- __forceinline void extend(const size_t dim,
- const size_t binID,
- const BBox3fa &b)
- {
- assert(binID < BINS);
- bounds [binID][dim].extend(b);
- }
-
- /*! bins an array of triangles */
- template<typename SplitPrimitive>
- __forceinline void bin(const SplitPrimitive& splitPrimitive, const PrimRef* prims, size_t N, const SpatialBinMapping<BINS>& mapping)
- {
- for (size_t i=0; i<N; i++)
- {
- const PrimRef prim = prims[i];
- unsigned splits = prim.geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
-
- if (unlikely(splits == 1))
- {
- const vint4 bin = mapping.bin(center(prim.bounds()));
- for (size_t dim=0; dim<3; dim++)
- {
- assert(bin[dim] >= (int)0 && bin[dim] < (int)BINS);
- numBegin[bin[dim]][dim]++;
- numEnd [bin[dim]][dim]++;
- bounds [bin[dim]][dim].extend(prim.bounds());
- }
- }
- else
- {
- const vint4 bin0 = mapping.bin(prim.bounds().lower);
- const vint4 bin1 = mapping.bin(prim.bounds().upper);
-
- for (size_t dim=0; dim<3; dim++)
- {
- size_t bin;
- PrimRef rest = prim;
- size_t l = bin0[dim];
- size_t r = bin1[dim];
-
- // same bin optimization
- if (likely(l == r))
- {
- numBegin[l][dim]++;
- numEnd [l][dim]++;
- bounds [l][dim].extend(prim.bounds());
- continue;
- }
-
- for (bin=(size_t)bin0[dim]; bin<(size_t)bin1[dim]; bin++)
- {
- const float pos = mapping.pos(bin+1,dim);
-
- PrimRef left,right;
- splitPrimitive(rest,(int)dim,pos,left,right);
- if (unlikely(left.bounds().empty())) l++;
- bounds[bin][dim].extend(left.bounds());
- rest = right;
- }
- if (unlikely(rest.bounds().empty())) r--;
- numBegin[l][dim]++;
- numEnd [r][dim]++;
- bounds [bin][dim].extend(rest.bounds());
- }
- }
- }
- }
-
- /*! bins a range of primitives inside an array */
- template<typename SplitPrimitive>
- void bin(const SplitPrimitive& splitPrimitive, const PrimRef* prims, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping) {
- bin(splitPrimitive,prims+begin,end-begin,mapping);
- }
-
- /*! bins an array of primitives */
- template<typename PrimitiveSplitterFactory>
- __forceinline void bin2(const PrimitiveSplitterFactory& splitterFactory, const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
- {
- for (size_t i=begin; i<end; i++)
- {
- const PrimRef &prim = source[i];
- const vint4 bin0 = mapping.bin(prim.bounds().lower);
- const vint4 bin1 = mapping.bin(prim.bounds().upper);
-
- for (size_t dim=0; dim<3; dim++)
- {
- if (unlikely(mapping.invalid(dim)))
- continue;
-
- size_t bin;
- size_t l = bin0[dim];
- size_t r = bin1[dim];
-
- // same bin optimization
- if (likely(l == r))
- {
- add(dim,l,l,l,prim.bounds());
- continue;
- }
- const size_t bin_start = bin0[dim];
- const size_t bin_end = bin1[dim];
- BBox3fa rest = prim.bounds();
- const auto splitter = splitterFactory(prim);
- for (bin=bin_start; bin<bin_end; bin++)
- {
- const float pos = mapping.pos(bin+1,dim);
- BBox3fa left,right;
- splitter(rest,dim,pos,left,right);
- if (unlikely(left.empty())) l++;
- extend(dim,bin,left);
- rest = right;
- }
- if (unlikely(rest.empty())) r--;
- add(dim,l,r,bin,rest);
- }
- }
- }
-
-
-
- /*! bins an array of primitives */
- __forceinline void binSubTreeRefs(const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
- {
- for (size_t i=begin; i<end; i++)
- {
- const PrimRef &prim = source[i];
- const vint4 bin0 = mapping.bin(prim.bounds().lower);
- const vint4 bin1 = mapping.bin(prim.bounds().upper);
-
- for (size_t dim=0; dim<3; dim++)
- {
- if (unlikely(mapping.invalid(dim)))
- continue;
-
- const size_t l = bin0[dim];
- const size_t r = bin1[dim];
-
- const unsigned int n = prim.primID();
-
- // same bin optimization
- if (likely(l == r))
- {
- add(dim,l,l,l,prim.bounds(),n);
- continue;
- }
- const size_t bin_start = bin0[dim];
- const size_t bin_end = bin1[dim];
- for (size_t bin=bin_start; bin<bin_end; bin++)
- add(dim,l,r,bin,prim.bounds(),n);
- }
- }
- }
-
- /*! merges in other binning information */
- void merge (const SpatialBinInfo& other)
- {
- for (size_t i=0; i<BINS; i++)
- {
- numBegin[i] += other.numBegin[i];
- numEnd [i] += other.numEnd [i];
- bounds[i][0].extend(other.bounds[i][0]);
- bounds[i][1].extend(other.bounds[i][1]);
- bounds[i][2].extend(other.bounds[i][2]);
- }
- }
-
- /*! merges in other binning information */
- static __forceinline const SpatialBinInfo reduce (const SpatialBinInfo& a, const SpatialBinInfo& b)
- {
- SpatialBinInfo c(empty);
- for (size_t i=0; i<BINS; i++)
- {
- c.numBegin[i] += a.numBegin[i]+b.numBegin[i];
- c.numEnd [i] += a.numEnd [i]+b.numEnd [i];
- c.bounds[i][0] = embree::merge(a.bounds[i][0],b.bounds[i][0]);
- c.bounds[i][1] = embree::merge(a.bounds[i][1],b.bounds[i][1]);
- c.bounds[i][2] = embree::merge(a.bounds[i][2],b.bounds[i][2]);
- }
- return c;
- }
-
- /*! finds the best split by scanning binning information */
- SpatialBinSplit<BINS> best(const SpatialBinMapping<BINS>& mapping, const size_t blocks_shift) const
- {
- /* sweep from right to left and compute parallel prefix of merged bounds */
- vfloat4 rAreas[BINS];
- vuint4 rCounts[BINS];
- vuint4 count = 0; BBox3fa bx = empty; BBox3fa by = empty; BBox3fa bz = empty;
- for (size_t i=BINS-1; i>0; i--)
- {
- count += numEnd[i];
- rCounts[i] = count;
- bx.extend(bounds[i][0]); rAreas[i][0] = halfArea(bx);
- by.extend(bounds[i][1]); rAreas[i][1] = halfArea(by);
- bz.extend(bounds[i][2]); rAreas[i][2] = halfArea(bz);
- rAreas[i][3] = 0.0f;
- }
-
- /* sweep from left to right and compute SAH */
- vuint4 blocks_add = (1 << blocks_shift)-1;
- vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; vuint4 vbestlCount = 0; vuint4 vbestrCount = 0;
- count = 0; bx = empty; by = empty; bz = empty;
- for (size_t i=1; i<BINS; i++, ii+=1)
- {
- count += numBegin[i-1];
- bx.extend(bounds[i-1][0]); float Ax = halfArea(bx);
- by.extend(bounds[i-1][1]); float Ay = halfArea(by);
- bz.extend(bounds[i-1][2]); float Az = halfArea(bz);
- const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
- const vfloat4 rArea = rAreas[i];
- const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift);
- const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift);
- const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount));
- // const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount)));
- const vbool4 mask = sah < vbestSAH;
- vbestPos = select(mask,ii ,vbestPos);
- vbestSAH = select(mask,sah,vbestSAH);
- vbestlCount = select(mask,count,vbestlCount);
- vbestrCount = select(mask,rCounts[i],vbestrCount);
- }
-
- /* find best dimension */
- float bestSAH = inf;
- int bestDim = -1;
- int bestPos = 0;
- unsigned int bestlCount = 0;
- unsigned int bestrCount = 0;
- for (int dim=0; dim<3; dim++)
- {
- /* ignore zero sized dimensions */
- if (unlikely(mapping.invalid(dim)))
- continue;
-
- /* test if this is a better dimension */
- if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
- bestDim = dim;
- bestPos = vbestPos[dim];
- bestSAH = vbestSAH[dim];
- bestlCount = vbestlCount[dim];
- bestrCount = vbestrCount[dim];
- }
- }
- assert(bestSAH >= 0.0f);
-
- /* return invalid split if no split found */
- if (bestDim == -1)
- return SpatialBinSplit<BINS>(inf,-1,0,mapping);
-
- /* return best found split */
- return SpatialBinSplit<BINS>(bestSAH,bestDim,bestPos,bestlCount,bestrCount,1.0f,mapping);
- }
-
- private:
- BBox3fa bounds[BINS][3]; //!< geometry bounds for each bin in each dimension
- vuint4 numBegin[BINS]; //!< number of primitives starting in bin
- vuint4 numEnd[BINS]; //!< number of primitives ending in bin
- };
- }
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial_array.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial_array.h
deleted file mode 100644
index 911dcf950c..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial_array.h
+++ /dev/null
@@ -1,552 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "heuristic_binning.h"
-#include "heuristic_spatial.h"
-
-namespace embree
-{
- namespace isa
- {
-#if 0
-#define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.2f
-#define SPATIAL_ASPLIT_SAH_THRESHOLD 0.95f
-#define SPATIAL_ASPLIT_AREA_THRESHOLD 0.0f
-#else
-#define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.1f
-#define SPATIAL_ASPLIT_SAH_THRESHOLD 0.99f
-#define SPATIAL_ASPLIT_AREA_THRESHOLD 0.000005f
-#endif
-
- struct PrimInfoExtRange : public CentGeomBBox3fa, public extended_range<size_t>
- {
- __forceinline PrimInfoExtRange() {
- }
-
- __forceinline PrimInfoExtRange(EmptyTy)
- : CentGeomBBox3fa(EmptyTy()), extended_range<size_t>(0,0,0) {}
-
- __forceinline PrimInfoExtRange(size_t begin, size_t end, size_t ext_end, const CentGeomBBox3fa& centGeomBounds)
- : CentGeomBBox3fa(centGeomBounds), extended_range<size_t>(begin,end,ext_end) {}
-
- __forceinline float leafSAH() const {
- return expectedApproxHalfArea(geomBounds)*float(size());
- }
-
- __forceinline float leafSAH(size_t block_shift) const {
- return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
- }
- };
-
- template<typename ObjectSplit, typename SpatialSplit>
- struct Split2
- {
- __forceinline Split2 () {}
-
- __forceinline Split2 (const Split2& other)
- {
- spatial = other.spatial;
- sah = other.sah;
- if (spatial) spatialSplit() = other.spatialSplit();
- else objectSplit() = other.objectSplit();
- }
-
- __forceinline Split2& operator= (const Split2& other)
- {
- spatial = other.spatial;
- sah = other.sah;
- if (spatial) spatialSplit() = other.spatialSplit();
- else objectSplit() = other.objectSplit();
- return *this;
- }
-
- __forceinline ObjectSplit& objectSplit() { return *( ObjectSplit*)data; }
- __forceinline const ObjectSplit& objectSplit() const { return *(const ObjectSplit*)data; }
-
- __forceinline SpatialSplit& spatialSplit() { return *( SpatialSplit*)data; }
- __forceinline const SpatialSplit& spatialSplit() const { return *(const SpatialSplit*)data; }
-
- __forceinline Split2 (const ObjectSplit& objectSplit, float sah)
- : spatial(false), sah(sah)
- {
- new (data) ObjectSplit(objectSplit);
- }
-
- __forceinline Split2 (const SpatialSplit& spatialSplit, float sah)
- : spatial(true), sah(sah)
- {
- new (data) SpatialSplit(spatialSplit);
- }
-
- __forceinline float splitSAH() const {
- return sah;
- }
-
- __forceinline bool valid() const {
- return sah < float(inf);
- }
-
- public:
- __aligned(64) char data[sizeof(ObjectSplit) > sizeof(SpatialSplit) ? sizeof(ObjectSplit) : sizeof(SpatialSplit)];
- bool spatial;
- float sah;
- };
-
- /*! Performs standard object binning */
- template<typename PrimitiveSplitterFactory, typename PrimRef, size_t OBJECT_BINS, size_t SPATIAL_BINS>
- struct HeuristicArraySpatialSAH
- {
- typedef BinSplit<OBJECT_BINS> ObjectSplit;
- typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> ObjectBinner;
-
- typedef SpatialBinSplit<SPATIAL_BINS> SpatialSplit;
- typedef SpatialBinInfo<SPATIAL_BINS,PrimRef> SpatialBinner;
-
- //typedef extended_range<size_t> Set;
- typedef Split2<ObjectSplit,SpatialSplit> Split;
-
-#if defined(__AVX512ER__) // KNL
- static const size_t PARALLEL_THRESHOLD = 3*1024;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 768;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
-#else
- static const size_t PARALLEL_THRESHOLD = 3*1024;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
-#endif
-
- static const size_t MOVE_STEP_SIZE = 64;
- static const size_t CREATE_SPLITS_STEP_SIZE = 64;
-
- __forceinline HeuristicArraySpatialSAH ()
- : prims0(nullptr) {}
-
- /*! remember prim array */
- __forceinline HeuristicArraySpatialSAH (const PrimitiveSplitterFactory& splitterFactory, PrimRef* prims0, const CentGeomBBox3fa& root_info)
- : prims0(prims0), splitterFactory(splitterFactory), root_info(root_info) {}
-
-
- /*! compute extended ranges */
- __noinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight)
- {
- assert(set.ext_range_size() > 0);
- const float left_factor = (float)lweight / (lweight + rweight);
- const size_t ext_range_size = set.ext_range_size();
- const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size);
- const size_t right_ext_range_size = ext_range_size - left_ext_range_size;
- lset.set_ext_range(lset.end() + left_ext_range_size);
- rset.set_ext_range(rset.end() + right_ext_range_size);
- }
-
- /*! move ranges */
- __noinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t left_ext_range_size = lset.ext_range_size();
- const size_t right_size = rset.size();
-
- /* has the left child an extended range? */
- if (left_ext_range_size > 0)
- {
- /* left extended range smaller than right range ? */
- if (left_ext_range_size < right_size)
- {
- /* only move a small part of the beginning of the right range to the end */
- parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++)
- prims0[i+right_size] = prims0[i];
- });
- }
- else
- {
- /* no overlap, move entire right range to new location, can be made fully parallel */
- parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++)
- prims0[i+left_ext_range_size] = prims0[i];
- });
- }
- /* update right range */
- assert(rset.ext_end() + left_ext_range_size == set.ext_end());
- rset.move_right(left_ext_range_size);
- }
- }
-
- /*! finds the best split */
- const Split find(const PrimInfoExtRange& set, const size_t logBlockSize)
- {
- SplitInfo oinfo;
- const ObjectSplit object_split = object_find(set,logBlockSize,oinfo);
- const float object_split_sah = object_split.splitSAH();
-
- if (unlikely(set.has_ext_range()))
- {
- const BBox3fa overlap = intersect(oinfo.leftBounds, oinfo.rightBounds);
-
- /* do only spatial splits if the child bounds overlap */
- if (safeArea(overlap) >= SPATIAL_ASPLIT_AREA_THRESHOLD*safeArea(root_info.geomBounds) &&
- safeArea(overlap) >= SPATIAL_ASPLIT_OVERLAP_THRESHOLD*safeArea(set.geomBounds))
- {
- const SpatialSplit spatial_split = spatial_find(set, logBlockSize);
- const float spatial_split_sah = spatial_split.splitSAH();
-
- /* valid spatial split, better SAH and number of splits do not exceed extended range */
- if (spatial_split_sah < SPATIAL_ASPLIT_SAH_THRESHOLD*object_split_sah &&
- spatial_split.left + spatial_split.right - set.size() <= set.ext_range_size())
- {
- return Split(spatial_split,spatial_split_sah);
- }
- }
- }
-
- return Split(object_split,object_split_sah);
- }
-
- /*! finds the best object split */
- __forceinline const ObjectSplit object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
- {
- if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize,info);
- else return parallel_object_find (set,logBlockSize,info);
- }
-
- /*! finds the best object split */
- __noinline const ObjectSplit sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
- {
- ObjectBinner binner(empty);
- const BinMapping<OBJECT_BINS> mapping(set);
- binner.bin(prims0,set.begin(),set.end(),mapping);
- ObjectSplit s = binner.best(mapping,logBlockSize);
- binner.getSplitInfo(mapping, s, info);
- return s;
- }
-
- /*! finds the best split */
- __noinline const ObjectSplit parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
- {
- ObjectBinner binner(empty);
- const BinMapping<OBJECT_BINS> mapping(set);
- const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
- binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,
- [&] (const range<size_t>& r) -> ObjectBinner { ObjectBinner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner; },
- [&] (const ObjectBinner& b0, const ObjectBinner& b1) -> ObjectBinner { ObjectBinner r = b0; r.merge(b1,_mapping.size()); return r; });
- ObjectSplit s = binner.best(mapping,logBlockSize);
- binner.getSplitInfo(mapping, s, info);
- return s;
- }
-
- /*! finds the best spatial split */
- __forceinline const SpatialSplit spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
- {
- if (set.size() < PARALLEL_THRESHOLD) return sequential_spatial_find(set, logBlockSize);
- else return parallel_spatial_find (set, logBlockSize);
- }
-
- /*! finds the best spatial split */
- __noinline const SpatialSplit sequential_spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
- {
- SpatialBinner binner(empty);
- const SpatialBinMapping<SPATIAL_BINS> mapping(set);
- binner.bin2(splitterFactory,prims0,set.begin(),set.end(),mapping);
- /* todo: best spatial split not exeeding the extended range does not provide any benefit ?*/
- return binner.best(mapping,logBlockSize); //,set.ext_size());
- }
-
- __noinline const SpatialSplit parallel_spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
- {
- SpatialBinner binner(empty);
- const SpatialBinMapping<SPATIAL_BINS> mapping(set);
- const SpatialBinMapping<SPATIAL_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
- binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,
- [&] (const range<size_t>& r) -> SpatialBinner {
- SpatialBinner binner(empty);
- binner.bin2(splitterFactory,prims0,r.begin(),r.end(),_mapping);
- return binner; },
- [&] (const SpatialBinner& b0, const SpatialBinner& b1) -> SpatialBinner { return SpatialBinner::reduce(b0,b1); });
- /* todo: best spatial split not exeeding the extended range does not provide any benefit ?*/
- return binner.best(mapping,logBlockSize); //,set.ext_size());
- }
-
-
- /*! subdivides primitives based on a spatial split */
- __noinline void create_spatial_splits(PrimInfoExtRange& set, const SpatialSplit& split, const SpatialBinMapping<SPATIAL_BINS> &mapping)
- {
- assert(set.has_ext_range());
- const size_t max_ext_range_size = set.ext_range_size();
- const size_t ext_range_start = set.end();
-
- /* atomic counter for number of primref splits */
- std::atomic<size_t> ext_elements;
- ext_elements.store(0);
-
- const float fpos = split.mapping.pos(split.pos,split.dim);
-
- const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
-
- parallel_for( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, [&](const range<size_t>& r) {
- for (size_t i=r.begin();i<r.end();i++)
- {
- const unsigned int splits = prims0[i].geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
-
- if (likely(splits <= 1)) continue; /* todo: does this ever happen ? */
-
- //int bin0 = split.mapping.bin(prims0[i].lower)[split.dim];
- //int bin1 = split.mapping.bin(prims0[i].upper)[split.dim];
- //if (unlikely(bin0 < split.pos && bin1 >= split.pos))
- if (unlikely(prims0[i].lower[split.dim] < fpos && prims0[i].upper[split.dim] > fpos))
- {
- assert(splits > 1);
-
- PrimRef left,right;
- const auto splitter = splitterFactory(prims0[i]);
- splitter(prims0[i],split.dim,fpos,left,right);
-
- // no empty splits
- if (unlikely(left.bounds().empty() || right.bounds().empty())) continue;
-
- left.lower.u = (left.lower.u & mask) | ((splits-1) << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
- right.lower.u = (right.lower.u & mask) | ((splits-1) << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
-
- const size_t ID = ext_elements.fetch_add(1);
-
- /* break if the number of subdivided elements are greater than the maximum allowed size */
- if (unlikely(ID >= max_ext_range_size))
- break;
-
- /* only write within the correct bounds */
- assert(ID < max_ext_range_size);
- prims0[i] = left;
- prims0[ext_range_start+ID] = right;
- }
- }
- });
-
- const size_t numExtElements = min(max_ext_range_size,ext_elements.load());
- assert(set.end()+numExtElements<=set.ext_end());
- set._end += numExtElements;
- }
-
- /*! array partitioning */
- void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- PrimInfoExtRange set = set_i;
-
- /* valid split */
- if (unlikely(!split.valid())) {
- deterministic_order(set);
- return splitFallback(set,lset,rset);
- }
-
- std::pair<size_t,size_t> ext_weights(0,0);
-
- if (unlikely(split.spatial))
- {
- create_spatial_splits(set,split.spatialSplit(), split.spatialSplit().mapping);
-
- /* spatial split */
- if (likely(set.size() < PARALLEL_THRESHOLD))
- ext_weights = sequential_spatial_split(split.spatialSplit(),set,lset,rset);
- else
- ext_weights = parallel_spatial_split(split.spatialSplit(),set,lset,rset);
- }
- else
- {
- /* object split */
- if (likely(set.size() < PARALLEL_THRESHOLD))
- ext_weights = sequential_object_split(split.objectSplit(),set,lset,rset);
- else
- ext_weights = parallel_object_split(split.objectSplit(),set,lset,rset);
- }
-
- /* if we have an extended range, set extended child ranges and move right split range */
- if (unlikely(set.has_ext_range()))
- {
- setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second);
- moveExtentedRange(set,lset,rset);
- }
- }
-
- /*! array partitioning */
- std::pair<size_t,size_t> sequential_object_split(const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfo local_left(empty);
- PrimInfo local_right(empty);
- const unsigned int splitPos = split.pos;
- const unsigned int splitDim = split.dim;
- const unsigned int splitDimMask = (unsigned int)1 << splitDim;
-
- const typename ObjectBinner::vint vSplitPos(splitPos);
- const typename ObjectBinner::vbool vSplitMask(splitDimMask);
- size_t center = serial_partitioning(prims0,
- begin,end,local_left,local_right,
- [&] (const PrimRef& ref) {
- return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask);
- },
- [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); });
- const size_t left_weight = local_left.end;
- const size_t right_weight = local_right.end;
-
- new (&lset) PrimInfoExtRange(begin,center,center,local_left);
- new (&rset) PrimInfoExtRange(center,end,end,local_right);
-
- assert(area(lset.geomBounds) >= 0.0f);
- assert(area(rset.geomBounds) >= 0.0f);
- return std::pair<size_t,size_t>(left_weight,right_weight);
- }
-
-
- /*! array partitioning */
- __noinline std::pair<size_t,size_t> sequential_spatial_split(const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfo local_left(empty);
- PrimInfo local_right(empty);
- const unsigned int splitPos = split.pos;
- const unsigned int splitDim = split.dim;
- const unsigned int splitDimMask = (unsigned int)1 << splitDim;
-
- /* init spatial mapping */
- const SpatialBinMapping<SPATIAL_BINS> &mapping = split.mapping;
- const vint4 vSplitPos(splitPos);
- const vbool4 vSplitMask( (int)splitDimMask );
-
- size_t center = serial_partitioning(prims0,
- begin,end,local_left,local_right,
- [&] (const PrimRef& ref) {
- const Vec3fa c = ref.bounds().center();
- return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask);
- },
- [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); });
-
- const size_t left_weight = local_left.end;
- const size_t right_weight = local_right.end;
-
- new (&lset) PrimInfoExtRange(begin,center,center,local_left);
- new (&rset) PrimInfoExtRange(center,end,end,local_right);
- assert(area(lset.geomBounds) >= 0.0f);
- assert(area(rset.geomBounds) >= 0.0f);
- return std::pair<size_t,size_t>(left_weight,right_weight);
- }
-
-
-
- /*! array partitioning */
- __noinline std::pair<size_t,size_t> parallel_object_split(const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfo left(empty);
- PrimInfo right(empty);
- const unsigned int splitPos = split.pos;
- const unsigned int splitDim = split.dim;
- const unsigned int splitDimMask = (unsigned int)1 << splitDim;
-
- const typename ObjectBinner::vint vSplitPos(splitPos);
- const typename ObjectBinner::vbool vSplitMask(splitDimMask);
- auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
-
- const size_t center = parallel_partitioning(
- prims0,begin,end,EmptyTy(),left,right,isLeft,
- [] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); },
- [] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(pinfo1); },
- PARALLEL_PARTITION_BLOCK_SIZE);
-
- const size_t left_weight = left.end;
- const size_t right_weight = right.end;
-
- left.begin = begin; left.end = center;
- right.begin = center; right.end = end;
-
- new (&lset) PrimInfoExtRange(begin,center,center,left);
- new (&rset) PrimInfoExtRange(center,end,end,right);
-
- assert(area(left.geomBounds) >= 0.0f);
- assert(area(right.geomBounds) >= 0.0f);
- return std::pair<size_t,size_t>(left_weight,right_weight);
- }
-
- /*! array partitioning */
- __noinline std::pair<size_t,size_t> parallel_spatial_split(const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfo left(empty);
- PrimInfo right(empty);
- const unsigned int splitPos = split.pos;
- const unsigned int splitDim = split.dim;
- const unsigned int splitDimMask = (unsigned int)1 << splitDim;
-
- /* init spatial mapping */
- const SpatialBinMapping<SPATIAL_BINS>& mapping = split.mapping;
- const vint4 vSplitPos(splitPos);
- const vbool4 vSplitMask( (int)splitDimMask );
-
- auto isLeft = [&] (const PrimRef &ref) {
- const Vec3fa c = ref.bounds().center();
- return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask); };
-
- const size_t center = parallel_partitioning(
- prims0,begin,end,EmptyTy(),left,right,isLeft,
- [] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); },
- [] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(pinfo1); },
- PARALLEL_PARTITION_BLOCK_SIZE);
-
- const size_t left_weight = left.end;
- const size_t right_weight = right.end;
-
- left.begin = begin; left.end = center;
- right.begin = center; right.end = end;
-
- new (&lset) PrimInfoExtRange(begin,center,center,left);
- new (&rset) PrimInfoExtRange(center,end,end,right);
-
- assert(area(left.geomBounds) >= 0.0f);
- assert(area(right.geomBounds) >= 0.0f);
- return std::pair<size_t,size_t>(left_weight,right_weight);
- }
-
- void deterministic_order(const PrimInfoExtRange& set)
- {
- /* required as parallel partition destroys original primitive order */
- std::sort(&prims0[set.begin()],&prims0[set.end()]);
- }
-
- void splitFallback(const PrimInfoExtRange& set,
- PrimInfoExtRange& lset,
- PrimInfoExtRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- const size_t center = (begin + end)/2;
-
- PrimInfo left(empty);
- for (size_t i=begin; i<center; i++) {
- left.add_center2(prims0[i],prims0[i].lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
- }
- const size_t lweight = left.end;
-
- PrimInfo right(empty);
- for (size_t i=center; i<end; i++) {
- right.add_center2(prims0[i],prims0[i].lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
- }
- const size_t rweight = right.end;
-
- new (&lset) PrimInfoExtRange(begin,center,center,left);
- new (&rset) PrimInfoExtRange(center,end,end,right);
-
- /* if we have an extended range */
- if (set.has_ext_range()) {
- setExtentedRanges(set,lset,rset,lweight,rweight);
- moveExtentedRange(set,lset,rset);
- }
- }
-
- private:
- PrimRef* const prims0;
- const PrimitiveSplitterFactory& splitterFactory;
- const CentGeomBBox3fa& root_info;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_strand_array.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_strand_array.h
deleted file mode 100644
index ede0d04c78..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/heuristic_strand_array.h
+++ /dev/null
@@ -1,188 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "priminfo.h"
-#include "../../common/algorithms/parallel_reduce.h"
-#include "../../common/algorithms/parallel_partition.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Performs standard object binning */
- struct HeuristicStrandSplit
- {
- typedef range<size_t> Set;
-
- static const size_t PARALLEL_THRESHOLD = 10000;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 4096;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 64;
-
- /*! stores all information to perform some split */
- struct Split
- {
- /*! construct an invalid split by default */
- __forceinline Split()
- : sah(inf), axis0(zero), axis1(zero) {}
-
- /*! constructs specified split */
- __forceinline Split(const float sah, const Vec3fa& axis0, const Vec3fa& axis1)
- : sah(sah), axis0(axis0), axis1(axis1) {}
-
- /*! calculates standard surface area heuristic for the split */
- __forceinline float splitSAH() const { return sah; }
-
- /*! test if this split is valid */
- __forceinline bool valid() const { return sah != float(inf); }
-
- public:
- float sah; //!< SAH cost of the split
- Vec3fa axis0, axis1; //!< axis the two strands are aligned into
- };
-
- __forceinline HeuristicStrandSplit () // FIXME: required?
- : scene(nullptr), prims(nullptr) {}
-
- /*! remember prim array */
- __forceinline HeuristicStrandSplit (Scene* scene, PrimRef* prims)
- : scene(scene), prims(prims) {}
-
- __forceinline const Vec3fa direction(const PrimRef& prim) {
- return scene->get(prim.geomID())->computeDirection(prim.primID());
- }
-
- __forceinline const BBox3fa bounds(const PrimRef& prim) {
- return scene->get(prim.geomID())->vbounds(prim.primID());
- }
-
- __forceinline const BBox3fa bounds(const LinearSpace3fa& space, const PrimRef& prim) {
- return scene->get(prim.geomID())->vbounds(space,prim.primID());
- }
-
- /*! finds the best split */
- const Split find(const range<size_t>& set, size_t logBlockSize)
- {
- Vec3fa axis0(0,0,1);
- uint64_t bestGeomPrimID = -1;
-
- /* curve with minimum ID determines first axis */
- for (size_t i=set.begin(); i<set.end(); i++)
- {
- const uint64_t geomprimID = prims[i].ID64();
- if (geomprimID >= bestGeomPrimID) continue;
- const Vec3fa axis = direction(prims[i]);
- if (sqr_length(axis) > 1E-18f) {
- axis0 = normalize(axis);
- bestGeomPrimID = geomprimID;
- }
- }
-
- /* find 2nd axis that is most misaligned with first axis and has minimum ID */
- float bestCos = 1.0f;
- Vec3fa axis1 = axis0;
- bestGeomPrimID = -1;
- for (size_t i=set.begin(); i<set.end(); i++)
- {
- const uint64_t geomprimID = prims[i].ID64();
- Vec3fa axisi = direction(prims[i]);
- float leni = length(axisi);
- if (leni == 0.0f) continue;
- axisi /= leni;
- float cos = abs(dot(axisi,axis0));
- if ((cos == bestCos && (geomprimID < bestGeomPrimID)) || cos < bestCos) {
- bestCos = cos; axis1 = axisi;
- bestGeomPrimID = geomprimID;
- }
- }
-
- /* partition the two strands */
- size_t lnum = 0, rnum = 0;
- BBox3fa lbounds = empty, rbounds = empty;
- const LinearSpace3fa space0 = frame(axis0).transposed();
- const LinearSpace3fa space1 = frame(axis1).transposed();
-
- for (size_t i=set.begin(); i<set.end(); i++)
- {
- PrimRef& prim = prims[i];
- const Vec3fa axisi = normalize(direction(prim));
- const float cos0 = abs(dot(axisi,axis0));
- const float cos1 = abs(dot(axisi,axis1));
-
- if (cos0 > cos1) { lnum++; lbounds.extend(bounds(space0,prim)); }
- else { rnum++; rbounds.extend(bounds(space1,prim)); }
- }
-
- /*! return an invalid split if we do not partition */
- if (lnum == 0 || rnum == 0)
- return Split(inf,axis0,axis1);
-
- /*! calculate sah for the split */
- const size_t lblocks = (lnum+(1ull<<logBlockSize)-1ull) >> logBlockSize;
- const size_t rblocks = (rnum+(1ull<<logBlockSize)-1ull) >> logBlockSize;
- const float sah = madd(float(lblocks),halfArea(lbounds),float(rblocks)*halfArea(rbounds));
- return Split(sah,axis0,axis1);
- }
-
- /*! array partitioning */
- void split(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
- {
- if (!split.valid()) {
- deterministic_order(set);
- return splitFallback(set,lset,rset);
- }
-
- const size_t begin = set.begin();
- const size_t end = set.end();
- CentGeomBBox3fa local_left(empty);
- CentGeomBBox3fa local_right(empty);
-
- auto primOnLeftSide = [&] (const PrimRef& prim) -> bool {
- const Vec3fa axisi = normalize(direction(prim));
- const float cos0 = abs(dot(axisi,split.axis0));
- const float cos1 = abs(dot(axisi,split.axis1));
- return cos0 > cos1;
- };
-
- auto mergePrimBounds = [this] (CentGeomBBox3fa& pinfo,const PrimRef& ref) {
- pinfo.extend(bounds(ref));
- };
-
- size_t center = serial_partitioning(prims,begin,end,local_left,local_right,primOnLeftSide,mergePrimBounds);
-
- new (&lset) PrimInfoRange(begin,center,local_left);
- new (&rset) PrimInfoRange(center,end,local_right);
- assert(area(lset.geomBounds) >= 0.0f);
- assert(area(rset.geomBounds) >= 0.0f);
- }
-
- void deterministic_order(const Set& set)
- {
- /* required as parallel partition destroys original primitive order */
- std::sort(&prims[set.begin()],&prims[set.end()]);
- }
-
- void splitFallback(const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- const size_t center = (begin + end)/2;
-
- CentGeomBBox3fa left(empty);
- for (size_t i=begin; i<center; i++)
- left.extend(bounds(prims[i]));
- new (&lset) PrimInfoRange(begin,center,left);
-
- CentGeomBBox3fa right(empty);
- for (size_t i=center; i<end; i++)
- right.extend(bounds(prims[i]));
- new (&rset) PrimInfoRange(center,end,right);
- }
-
- private:
- Scene* const scene;
- PrimRef* const prims;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_timesplit_array.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_timesplit_array.h
deleted file mode 100644
index c999941a11..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/heuristic_timesplit_array.h
+++ /dev/null
@@ -1,237 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/primref_mb.h"
-#include "../../common/algorithms/parallel_filter.h"
-
-#define MBLUR_TIME_SPLIT_THRESHOLD 1.25f
-
-namespace embree
-{
- namespace isa
- {
- /*! Performs standard object binning */
- template<typename PrimRefMB, typename RecalculatePrimRef, size_t BINS>
- struct HeuristicMBlurTemporalSplit
- {
- typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split;
- typedef mvector<PrimRefMB>* PrimRefVector;
- typedef typename PrimRefMB::BBox BBox;
-
- static const size_t PARALLEL_THRESHOLD = 3 * 1024;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
-
- HeuristicMBlurTemporalSplit (MemoryMonitorInterface* device, const RecalculatePrimRef& recalculatePrimRef)
- : device(device), recalculatePrimRef(recalculatePrimRef) {}
-
- struct TemporalBinInfo
- {
- __forceinline TemporalBinInfo () {
- }
-
- __forceinline TemporalBinInfo (EmptyTy)
- {
- for (size_t i=0; i<BINS-1; i++)
- {
- count0[i] = count1[i] = 0;
- bounds0[i] = bounds1[i] = empty;
- }
- }
-
- void bin(const PrimRefMB* prims, size_t begin, size_t end, BBox1f time_range, const SetMB& set, const RecalculatePrimRef& recalculatePrimRef)
- {
- for (int b=0; b<BINS-1; b++)
- {
- const float t = float(b+1)/float(BINS);
- const float ct = lerp(time_range.lower,time_range.upper,t);
- const float center_time = set.align_time(ct);
- if (center_time <= time_range.lower) continue;
- if (center_time >= time_range.upper) continue;
- const BBox1f dt0(time_range.lower,center_time);
- const BBox1f dt1(center_time,time_range.upper);
-
- /* find linear bounds for both time segments */
- for (size_t i=begin; i<end; i++)
- {
- if (prims[i].time_range_overlap(dt0))
- {
- const LBBox3fa bn0 = recalculatePrimRef.linearBounds(prims[i],dt0);
-#if MBLUR_BIN_LBBOX
- bounds0[b].extend(bn0);
-#else
- bounds0[b].extend(bn0.interpolate(0.5f));
-#endif
- count0[b] += prims[i].timeSegmentRange(dt0).size();
- }
-
- if (prims[i].time_range_overlap(dt1))
- {
- const LBBox3fa bn1 = recalculatePrimRef.linearBounds(prims[i],dt1);
-#if MBLUR_BIN_LBBOX
- bounds1[b].extend(bn1);
-#else
- bounds1[b].extend(bn1.interpolate(0.5f));
-#endif
- count1[b] += prims[i].timeSegmentRange(dt1).size();
- }
- }
- }
- }
-
- __forceinline void bin_parallel(const PrimRefMB* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, BBox1f time_range, const SetMB& set, const RecalculatePrimRef& recalculatePrimRef)
- {
- if (likely(end-begin < parallelThreshold)) {
- bin(prims,begin,end,time_range,set,recalculatePrimRef);
- }
- else
- {
- auto bin = [&](const range<size_t>& r) -> TemporalBinInfo {
- TemporalBinInfo binner(empty); binner.bin(prims, r.begin(), r.end(), time_range, set, recalculatePrimRef); return binner;
- };
- *this = parallel_reduce(begin,end,blockSize,TemporalBinInfo(empty),bin,merge2);
- }
- }
-
- /*! merges in other binning information */
- __forceinline void merge (const TemporalBinInfo& other)
- {
- for (size_t i=0; i<BINS-1; i++)
- {
- count0[i] += other.count0[i];
- count1[i] += other.count1[i];
- bounds0[i].extend(other.bounds0[i]);
- bounds1[i].extend(other.bounds1[i]);
- }
- }
-
- static __forceinline const TemporalBinInfo merge2(const TemporalBinInfo& a, const TemporalBinInfo& b) {
- TemporalBinInfo r = a; r.merge(b); return r;
- }
-
- Split best(int logBlockSize, BBox1f time_range, const SetMB& set)
- {
- float bestSAH = inf;
- float bestPos = 0.0f;
- for (int b=0; b<BINS-1; b++)
- {
- float t = float(b+1)/float(BINS);
- float ct = lerp(time_range.lower,time_range.upper,t);
- const float center_time = set.align_time(ct);
- if (center_time <= time_range.lower) continue;
- if (center_time >= time_range.upper) continue;
- const BBox1f dt0(time_range.lower,center_time);
- const BBox1f dt1(center_time,time_range.upper);
-
- /* calculate sah */
- const size_t lCount = (count0[b]+(size_t(1) << logBlockSize)-1) >> int(logBlockSize);
- const size_t rCount = (count1[b]+(size_t(1) << logBlockSize)-1) >> int(logBlockSize);
- float sah0 = expectedApproxHalfArea(bounds0[b])*float(lCount)*dt0.size();
- float sah1 = expectedApproxHalfArea(bounds1[b])*float(rCount)*dt1.size();
- if (unlikely(lCount == 0)) sah0 = 0.0f; // happens for initial splits when objects not alive over entire shutter time
- if (unlikely(rCount == 0)) sah1 = 0.0f;
- const float sah = sah0+sah1;
- if (sah < bestSAH) {
- bestSAH = sah;
- bestPos = center_time;
- }
- }
- return Split(bestSAH*MBLUR_TIME_SPLIT_THRESHOLD,(unsigned)Split::SPLIT_TEMPORAL,0,bestPos);
- }
-
- public:
- size_t count0[BINS-1];
- size_t count1[BINS-1];
- BBox bounds0[BINS-1];
- BBox bounds1[BINS-1];
- };
-
- /*! finds the best split */
- const Split find(const SetMB& set, const size_t logBlockSize)
- {
- assert(set.size() > 0);
- TemporalBinInfo binner(empty);
- binner.bin_parallel(set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,set.time_range,set,recalculatePrimRef);
- Split tsplit = binner.best((int)logBlockSize,set.time_range,set);
- if (!tsplit.valid()) tsplit.data = Split::SPLIT_FALLBACK; // use fallback split
- return tsplit;
- }
-
- __forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& tsplit, const SetMB& set, SetMB& lset, SetMB& rset)
- {
- assert(tsplit.sah != float(inf));
- assert(tsplit.fpos > set.time_range.lower);
- assert(tsplit.fpos < set.time_range.upper);
-
- float center_time = tsplit.fpos;
- const BBox1f time_range0(set.time_range.lower,center_time);
- const BBox1f time_range1(center_time,set.time_range.upper);
- mvector<PrimRefMB>& prims = *set.prims;
-
- /* calculate primrefs for first time range */
- std::unique_ptr<mvector<PrimRefMB>> new_vector(new mvector<PrimRefMB>(device, set.size()));
- PrimRefVector lprims = new_vector.get();
-
- auto reduction_func0 = [&] (const range<size_t>& r) {
- PrimInfoMB pinfo = empty;
- for (size_t i=r.begin(); i<r.end(); i++)
- {
- if (likely(prims[i].time_range_overlap(time_range0)))
- {
- const PrimRefMB& prim = recalculatePrimRef(prims[i],time_range0);
- (*lprims)[i-set.begin()] = prim;
- pinfo.add_primref(prim);
- }
- else
- {
- (*lprims)[i-set.begin()] = prims[i];
- }
- }
- return pinfo;
- };
- PrimInfoMB linfo = parallel_reduce(set.object_range,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD,PrimInfoMB(empty),reduction_func0,PrimInfoMB::merge2);
-
- /* primrefs for first time range are in lprims[0 .. set.size()) */
- /* some primitives may need to be filtered out */
- if (linfo.size() != set.size())
- linfo.object_range._end = parallel_filter(lprims->data(), size_t(0), set.size(), size_t(1024),
- [&](const PrimRefMB& prim) { return prim.time_range_overlap(time_range0); });
-
- lset = SetMB(linfo,lprims,time_range0);
-
- /* calculate primrefs for second time range */
- auto reduction_func1 = [&] (const range<size_t>& r) {
- PrimInfoMB pinfo = empty;
- for (size_t i=r.begin(); i<r.end(); i++)
- {
- if (likely(prims[i].time_range_overlap(time_range1)))
- {
- const PrimRefMB& prim = recalculatePrimRef(prims[i],time_range1);
- prims[i] = prim;
- pinfo.add_primref(prim);
- }
- }
- return pinfo;
- };
- PrimInfoMB rinfo = parallel_reduce(set.object_range,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD,PrimInfoMB(empty),reduction_func1,PrimInfoMB::merge2);
- rinfo.object_range = range<size_t>(set.begin(), set.begin() + rinfo.size());
-
- /* primrefs for second time range are in prims[set.begin() .. set.end()) */
- /* some primitives may need to be filtered out */
- if (rinfo.size() != set.size())
- rinfo.object_range._end = parallel_filter(prims.data(), set.begin(), set.end(), size_t(1024),
- [&](const PrimRefMB& prim) { return prim.time_range_overlap(time_range1); });
-
- rset = SetMB(rinfo,&prims,time_range1);
-
- return new_vector;
- }
-
- private:
- MemoryMonitorInterface* device; // device to report memory usage to
- const RecalculatePrimRef recalculatePrimRef;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/priminfo.h b/thirdparty/embree-aarch64/kernels/builders/priminfo.h
deleted file mode 100644
index 06c1388742..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/priminfo.h
+++ /dev/null
@@ -1,362 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/default.h"
-#include "../common/primref.h"
-#include "../common/primref_mb.h"
-
-namespace embree
-{
- // FIXME: maybe there's a better place for this util fct
- __forceinline float areaProjectedTriangle(const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2)
- {
- const Vec3fa e0 = v1-v0;
- const Vec3fa e1 = v2-v0;
- const Vec3fa d = cross(e0,e1);
- return fabs(d.x) + fabs(d.y) + fabs(d.z);
- }
-
- //namespace isa
- //{
- template<typename BBox>
- class CentGeom
- {
- public:
- __forceinline CentGeom () {}
-
- __forceinline CentGeom (EmptyTy)
- : geomBounds(empty), centBounds(empty) {}
-
- __forceinline CentGeom (const BBox& geomBounds, const BBox3fa& centBounds)
- : geomBounds(geomBounds), centBounds(centBounds) {}
-
- template<typename PrimRef>
- __forceinline void extend_primref(const PrimRef& prim)
- {
- BBox bounds; Vec3fa center;
- prim.binBoundsAndCenter(bounds,center);
- geomBounds.extend(bounds);
- centBounds.extend(center);
- }
-
- template<typename PrimRef>
- __forceinline void extend_center2(const PrimRef& prim)
- {
- BBox3fa bounds = prim.bounds();
- geomBounds.extend(bounds);
- centBounds.extend(bounds.center2());
- }
-
- __forceinline void extend(const BBox& geomBounds_) {
- geomBounds.extend(geomBounds_);
- centBounds.extend(center2(geomBounds_));
- }
-
- __forceinline void merge(const CentGeom& other)
- {
- geomBounds.extend(other.geomBounds);
- centBounds.extend(other.centBounds);
- }
-
- static __forceinline const CentGeom merge2(const CentGeom& a, const CentGeom& b) {
- CentGeom r = a; r.merge(b); return r;
- }
-
- public:
- BBox geomBounds; //!< geometry bounds of primitives
- BBox3fa centBounds; //!< centroid bounds of primitives
- };
-
- typedef CentGeom<BBox3fa> CentGeomBBox3fa;
-
- /*! stores bounding information for a set of primitives */
- template<typename BBox>
- class PrimInfoT : public CentGeom<BBox>
- {
- public:
- using CentGeom<BBox>::geomBounds;
- using CentGeom<BBox>::centBounds;
-
- __forceinline PrimInfoT () {}
-
- __forceinline PrimInfoT (EmptyTy)
- : CentGeom<BBox>(empty), begin(0), end(0) {}
-
- __forceinline PrimInfoT (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds)
- : CentGeom<BBox>(centGeomBounds), begin(begin), end(end) {}
-
- template<typename PrimRef>
- __forceinline void add_primref(const PrimRef& prim)
- {
- CentGeom<BBox>::extend_primref(prim);
- end++;
- }
-
- template<typename PrimRef>
- __forceinline void add_center2(const PrimRef& prim) {
- CentGeom<BBox>::extend_center2(prim);
- end++;
- }
-
- template<typename PrimRef>
- __forceinline void add_center2(const PrimRef& prim, const size_t i) {
- CentGeom<BBox>::extend_center2(prim);
- end+=i;
- }
-
- /*__forceinline void add(const BBox& geomBounds_) {
- CentGeom<BBox>::extend(geomBounds_);
- end++;
- }
-
- __forceinline void add(const BBox& geomBounds_, const size_t i) {
- CentGeom<BBox>::extend(geomBounds_);
- end+=i;
- }*/
-
- __forceinline void merge(const PrimInfoT& other)
- {
- CentGeom<BBox>::merge(other);
- begin += other.begin;
- end += other.end;
- }
-
- static __forceinline const PrimInfoT merge(const PrimInfoT& a, const PrimInfoT& b) {
- PrimInfoT r = a; r.merge(b); return r;
- }
-
- /*! returns the number of primitives */
- __forceinline size_t size() const {
- return end-begin;
- }
-
- __forceinline float halfArea() {
- return expectedApproxHalfArea(geomBounds);
- }
-
- __forceinline float leafSAH() const {
- return expectedApproxHalfArea(geomBounds)*float(size());
- //return halfArea(geomBounds)*blocks(num);
- }
-
- __forceinline float leafSAH(size_t block_shift) const {
- return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
- //return halfArea(geomBounds)*float((num+3) >> 2);
- //return halfArea(geomBounds)*blocks(num);
- }
-
- /*! stream output */
- friend embree_ostream operator<<(embree_ostream cout, const PrimInfoT& pinfo) {
- return cout << "PrimInfo { begin = " << pinfo.begin << ", end = " << pinfo.end << ", geomBounds = " << pinfo.geomBounds << ", centBounds = " << pinfo.centBounds << "}";
- }
-
- public:
- size_t begin,end; //!< number of primitives
- };
-
- typedef PrimInfoT<BBox3fa> PrimInfo;
- //typedef PrimInfoT<LBBox3fa> PrimInfoMB;
-
- /*! stores bounding information for a set of primitives */
- template<typename BBox>
- class PrimInfoMBT : public CentGeom<BBox>
- {
- public:
- using CentGeom<BBox>::geomBounds;
- using CentGeom<BBox>::centBounds;
-
- __forceinline PrimInfoMBT () {
- }
-
- __forceinline PrimInfoMBT (EmptyTy)
- : CentGeom<BBox>(empty), object_range(0,0), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
-
- __forceinline PrimInfoMBT (size_t begin, size_t end)
- : CentGeom<BBox>(empty), object_range(begin,end), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
-
- template<typename PrimRef>
- __forceinline void add_primref(const PrimRef& prim)
- {
- CentGeom<BBox>::extend_primref(prim);
- time_range.extend(prim.time_range);
- object_range._end++;
- num_time_segments += prim.size();
- if (max_num_time_segments < prim.totalTimeSegments()) {
- max_num_time_segments = prim.totalTimeSegments();
- max_time_range = prim.time_range;
- }
- }
-
- __forceinline void merge(const PrimInfoMBT& other)
- {
- CentGeom<BBox>::merge(other);
- time_range.extend(other.time_range);
- object_range._begin += other.object_range.begin();
- object_range._end += other.object_range.end();
- num_time_segments += other.num_time_segments;
- if (max_num_time_segments < other.max_num_time_segments) {
- max_num_time_segments = other.max_num_time_segments;
- max_time_range = other.max_time_range;
- }
- }
-
- static __forceinline const PrimInfoMBT merge2(const PrimInfoMBT& a, const PrimInfoMBT& b) {
- PrimInfoMBT r = a; r.merge(b); return r;
- }
-
- __forceinline size_t begin() const {
- return object_range.begin();
- }
-
- __forceinline size_t end() const {
- return object_range.end();
- }
-
- /*! returns the number of primitives */
- __forceinline size_t size() const {
- return object_range.size();
- }
-
- __forceinline float halfArea() const {
- return time_range.size()*expectedApproxHalfArea(geomBounds);
- }
-
- __forceinline float leafSAH() const {
- return time_range.size()*expectedApproxHalfArea(geomBounds)*float(num_time_segments);
- }
-
- __forceinline float leafSAH(size_t block_shift) const {
- return time_range.size()*expectedApproxHalfArea(geomBounds)*float((num_time_segments+(size_t(1)<<block_shift)-1) >> block_shift);
- }
-
- __forceinline float align_time(float ct) const
- {
- //return roundf(ct * float(numTimeSegments)) / float(numTimeSegments);
- float t0 = (ct-max_time_range.lower)/max_time_range.size();
- float t1 = roundf(t0 * float(max_num_time_segments)) / float(max_num_time_segments);
- return t1*max_time_range.size()+max_time_range.lower;
- }
-
- /*! stream output */
- friend embree_ostream operator<<(embree_ostream cout, const PrimInfoMBT& pinfo)
- {
- return cout << "PrimInfo { " <<
- "object_range = " << pinfo.object_range <<
- ", time_range = " << pinfo.time_range <<
- ", time_segments = " << pinfo.num_time_segments <<
- ", geomBounds = " << pinfo.geomBounds <<
- ", centBounds = " << pinfo.centBounds <<
- "}";
- }
-
- public:
- range<size_t> object_range; //!< primitive range
- size_t num_time_segments; //!< total number of time segments of all added primrefs
- size_t max_num_time_segments; //!< maximum number of time segments of a primitive
- BBox1f max_time_range; //!< time range of primitive with max_num_time_segments
- BBox1f time_range; //!< merged time range of primitives when merging prims, or additionally clipped with build time range when used in SetMB
- };
-
- typedef PrimInfoMBT<typename PrimRefMB::BBox> PrimInfoMB;
-
- struct SetMB : public PrimInfoMB
- {
- static const size_t PARALLEL_THRESHOLD = 3 * 1024;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
-
- typedef mvector<PrimRefMB>* PrimRefVector;
-
- __forceinline SetMB() {}
-
- __forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims)
- : PrimInfoMB(pinfo_i), prims(prims) {}
-
- __forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, range<size_t> object_range_in, BBox1f time_range_in)
- : PrimInfoMB(pinfo_i), prims(prims)
- {
- object_range = object_range_in;
- time_range = intersect(time_range,time_range_in);
- }
-
- __forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, BBox1f time_range_in)
- : PrimInfoMB(pinfo_i), prims(prims)
- {
- time_range = intersect(time_range,time_range_in);
- }
-
- void deterministic_order() const
- {
- /* required as parallel partition destroys original primitive order */
- PrimRefMB* prim = prims->data();
- std::sort(&prim[object_range.begin()],&prim[object_range.end()]);
- }
-
- template<typename RecalculatePrimRef>
- __forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef) const
- {
- auto reduce = [&](const range<size_t>& r) -> LBBox3fa
- {
- LBBox3fa cbounds(empty);
- for (size_t j = r.begin(); j < r.end(); j++)
- {
- PrimRefMB& ref = (*prims)[j];
- const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range);
- cbounds.extend(bn);
- };
- return cbounds;
- };
-
- return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
- reduce,
- [&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
- }
-
- template<typename RecalculatePrimRef>
- __forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
- {
- auto reduce = [&](const range<size_t>& r) -> LBBox3fa
- {
- LBBox3fa cbounds(empty);
- for (size_t j = r.begin(); j < r.end(); j++)
- {
- PrimRefMB& ref = (*prims)[j];
- const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range, space);
- cbounds.extend(bn);
- };
- return cbounds;
- };
-
- return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
- reduce,
- [&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
- }
-
- template<typename RecalculatePrimRef>
- const SetMB primInfo(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
- {
- auto computePrimInfo = [&](const range<size_t>& r) -> PrimInfoMB
- {
- PrimInfoMB pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- PrimRefMB& ref = (*prims)[j];
- PrimRefMB ref1 = recalculatePrimRef(ref,time_range,space);
- pinfo.add_primref(ref1);
- };
- return pinfo;
- };
-
- const PrimInfoMB pinfo = parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD,
- PrimInfoMB(empty), computePrimInfo, PrimInfoMB::merge2);
-
- return SetMB(pinfo,prims,object_range,time_range);
- }
-
- public:
- PrimRefVector prims;
- };
-//}
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/primrefgen.cpp b/thirdparty/embree-aarch64/kernels/builders/primrefgen.cpp
deleted file mode 100644
index e23de3df28..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/primrefgen.cpp
+++ /dev/null
@@ -1,244 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "primrefgen.h"
-#include "primrefgen_presplit.h"
-
-#include "../../common/algorithms/parallel_for_for.h"
-#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
-
-namespace embree
-{
- namespace isa
- {
- PrimInfo createPrimRefArray(Geometry* geometry, unsigned int geomID, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
- {
- ParallelPrefixSumState<PrimInfo> pstate;
-
- /* first try */
- progressMonitor(0);
- PrimInfo pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
- return geometry->createPrimRefArray(prims,r,r.begin(),geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- /* if we need to filter out geometry, run again */
- if (pinfo.size() != prims.size())
- {
- progressMonitor(0);
- pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
- return geometry->createPrimRefArray(prims,r,base.size(),geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- }
- return pinfo;
- }
-
- PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
- {
- ParallelForForPrefixSumState<PrimInfo> pstate;
- Scene::Iterator2 iter(scene,types,mblur);
-
- /* first try */
- progressMonitor(0);
- pstate.init(iter,size_t(1024));
- PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
- return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- /* if we need to filter out geometry, run again */
- if (pinfo.size() != prims.size())
- {
- progressMonitor(0);
- pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
- return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- }
- return pinfo;
- }
-
- PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime)
- {
- ParallelForForPrefixSumState<PrimInfo> pstate;
- Scene::Iterator2 iter(scene,types,true);
-
- /* first try */
- progressMonitor(0);
- pstate.init(iter,size_t(1024));
- PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
- return mesh->createPrimRefArrayMB(prims,itime,r,k,(unsigned)geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- /* if we need to filter out geometry, run again */
- if (pinfo.size() != prims.size())
- {
- progressMonitor(0);
- pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
- return mesh->createPrimRefArrayMB(prims,itime,r,base.size(),(unsigned)geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- }
- return pinfo;
- }
-
- PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1)
- {
- ParallelForForPrefixSumState<PrimInfoMB> pstate;
- Scene::Iterator2 iter(scene,types,true);
-
- /* first try */
- progressMonitor(0);
- pstate.init(iter,size_t(1024));
- PrimInfoMB pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfoMB {
- return mesh->createPrimRefMBArray(prims,t0t1,r,k,(unsigned)geomID);
- }, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
-
- /* if we need to filter out geometry, run again */
- if (pinfo.size() != prims.size())
- {
- progressMonitor(0);
- pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
- return mesh->createPrimRefMBArray(prims,t0t1,r,base.size(),(unsigned)geomID);
- }, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
- }
-
- /* the BVH starts with that time range, even though primitives might have smaller/larger time range */
- pinfo.time_range = t0t1;
- return pinfo;
- }
-
- template<typename Mesh>
- size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor)
- {
- size_t numPrimitives = morton.size();
-
- /* compute scene bounds */
- std::pair<size_t,BBox3fa> cb_empty(0,empty);
- auto cb = parallel_reduce
- ( size_t(0), numPrimitives, size_t(1024), cb_empty, [&](const range<size_t>& r) -> std::pair<size_t,BBox3fa>
- {
- size_t num = 0;
- BBox3fa bounds = empty;
-
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa prim_bounds = empty;
- if (unlikely(!mesh->buildBounds(j,&prim_bounds))) continue;
- bounds.extend(center2(prim_bounds));
- num++;
- }
- return std::make_pair(num,bounds);
- }, [] (const std::pair<size_t,BBox3fa>& a, const std::pair<size_t,BBox3fa>& b) {
- return std::make_pair(a.first + b.first,merge(a.second,b.second));
- });
-
-
- size_t numPrimitivesGen = cb.first;
- const BBox3fa centBounds = cb.second;
-
- /* compute morton codes */
- if (likely(numPrimitivesGen == numPrimitives))
- {
- /* fast path if all primitives were valid */
- BVHBuilderMorton::MortonCodeMapping mapping(centBounds);
- parallel_for( size_t(0), numPrimitives, size_t(1024), [&](const range<size_t>& r) -> void {
- BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[r.begin()]);
- for (size_t j=r.begin(); j<r.end(); j++)
- generator(mesh->bounds(j),unsigned(j));
- });
- }
- else
- {
- /* slow path, fallback in case some primitives were invalid */
- ParallelPrefixSumState<size_t> pstate;
- BVHBuilderMorton::MortonCodeMapping mapping(centBounds);
- parallel_prefix_sum( pstate, size_t(0), numPrimitives, size_t(1024), size_t(0), [&](const range<size_t>& r, const size_t base) -> size_t {
- size_t num = 0;
- BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[r.begin()]);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (unlikely(!mesh->buildBounds(j,&bounds))) continue;
- generator(bounds,unsigned(j));
- num++;
- }
- return num;
- }, std::plus<size_t>());
-
- parallel_prefix_sum( pstate, size_t(0), numPrimitives, size_t(1024), size_t(0), [&](const range<size_t>& r, const size_t base) -> size_t {
- size_t num = 0;
- BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[base]);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!mesh->buildBounds(j,&bounds)) continue;
- generator(bounds,unsigned(j));
- num++;
- }
- return num;
- }, std::plus<size_t>());
- }
- return numPrimitivesGen;
- }
-
- // ====================================================================================================
- // ====================================================================================================
- // ====================================================================================================
-
- // template for grid meshes
-
-#if 0
- template<>
- PrimInfo createPrimRefArray<GridMesh,false>(Scene* scene, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
- {
- PING;
- ParallelForForPrefixSumState<PrimInfo> pstate;
- Scene::Iterator<GridMesh,false> iter(scene);
-
- /* first try */
- progressMonitor(0);
- pstate.init(iter,size_t(1024));
- PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k) -> PrimInfo
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!mesh->buildBounds(j,&bounds)) continue;
- const PrimRef prim(bounds,mesh->geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- /* if we need to filter out geometry, run again */
- if (pinfo.size() != prims.size())
- {
- progressMonitor(0);
- pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, const PrimInfo& base) -> PrimInfo
- {
- k = base.size();
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!mesh->buildBounds(j,&bounds)) continue;
- const PrimRef prim(bounds,mesh->geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- }
- return pinfo;
- }
-#endif
-
- // ====================================================================================================
- // ====================================================================================================
- // ====================================================================================================
-
- IF_ENABLED_TRIS (template size_t createMortonCodeArray<TriangleMesh>(TriangleMesh* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
- IF_ENABLED_QUADS(template size_t createMortonCodeArray<QuadMesh>(QuadMesh* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
- IF_ENABLED_USER (template size_t createMortonCodeArray<UserGeometry>(UserGeometry* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
- IF_ENABLED_INSTANCE (template size_t createMortonCodeArray<Instance>(Instance* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/primrefgen.h b/thirdparty/embree-aarch64/kernels/builders/primrefgen.h
deleted file mode 100644
index 9919c945c3..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/primrefgen.h
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/scene.h"
-#include "../common/primref.h"
-#include "../common/primref_mb.h"
-#include "priminfo.h"
-#include "bvh_builder_morton.h"
-
-namespace embree
-{
- namespace isa
- {
- PrimInfo createPrimRefArray(Geometry* geometry, unsigned int geomID, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor);
-
- PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor);
-
- PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime = 0);
-
- PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f));
-
- template<typename Mesh>
- size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor);
- }
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/builders/primrefgen_presplit.h b/thirdparty/embree-aarch64/kernels/builders/primrefgen_presplit.h
deleted file mode 100644
index 8bdb38b955..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/primrefgen_presplit.h
+++ /dev/null
@@ -1,371 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../builders/primrefgen.h"
-#include "../builders/heuristic_spatial.h"
-#include "../builders/splitter.h"
-
-#include "../../common/algorithms/parallel_for_for.h"
-#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
-
-#define DBG_PRESPLIT(x)
-#define CHECK_PRESPLIT(x)
-
-#define GRID_SIZE 1024
-#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 5
-#define MAX_PRESPLITS_PER_PRIMITIVE (1<<MAX_PRESPLITS_PER_PRIMITIVE_LOG)
-#define PRIORITY_CUTOFF_THRESHOLD 1.0f
-#define PRIORITY_SPLIT_POS_WEIGHT 1.5f
-
-namespace embree
-{
- namespace isa
- {
-
- struct PresplitItem
- {
- union {
- float priority;
- unsigned int data;
- };
- unsigned int index;
-
- __forceinline operator unsigned() const
- {
- return reinterpret_cast<const unsigned&>(priority);
- }
- __forceinline bool operator < (const PresplitItem& item) const
- {
- return (priority < item.priority);
- }
-
- template<typename Mesh>
- __forceinline static float compute_priority(const PrimRef &ref, Scene *scene, const Vec2i &mc)
- {
- const unsigned int geomID = ref.geomID();
- const unsigned int primID = ref.primID();
- const float area_aabb = area(ref.bounds());
- const float area_prim = ((Mesh*)scene->get(geomID))->projectedPrimitiveArea(primID);
- const unsigned int diff = 31 - lzcnt(mc.x^mc.y);
- assert(area_prim <= area_aabb);
- //const float priority = powf((area_aabb - area_prim) * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff),1.0f/4.0f);
- const float priority = sqrtf(sqrtf( (area_aabb - area_prim) * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff) ));
- assert(priority >= 0.0f && priority < FLT_LARGE);
- return priority;
- }
-
-
- };
-
- inline std::ostream &operator<<(std::ostream &cout, const PresplitItem& item) {
- return cout << "index " << item.index << " priority " << item.priority;
- };
-
- template<typename SplitterFactory>
- void splitPrimitive(SplitterFactory &Splitter,
- const PrimRef &prim,
- const unsigned int geomID,
- const unsigned int primID,
- const unsigned int split_level,
- const Vec3fa &grid_base,
- const float grid_scale,
- const float grid_extend,
- PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
- unsigned int& numSubPrims)
- {
- assert(split_level <= MAX_PRESPLITS_PER_PRIMITIVE_LOG);
- if (split_level == 0)
- {
- assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
- subPrims[numSubPrims++] = prim;
- }
- else
- {
- const Vec3fa lower = prim.lower;
- const Vec3fa upper = prim.upper;
- const Vec3fa glower = (lower-grid_base)*Vec3fa(grid_scale)+Vec3fa(0.2f);
- const Vec3fa gupper = (upper-grid_base)*Vec3fa(grid_scale)-Vec3fa(0.2f);
- Vec3ia ilower(floor(glower));
- Vec3ia iupper(floor(gupper));
-
- /* this ignores dimensions that are empty */
- iupper = (Vec3ia)(select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper)));
-
- /* compute a morton code for the lower and upper grid coordinates. */
- const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
- const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
-
- /* if all bits are equal then we cannot split */
- if(unlikely(lower_code == upper_code))
- {
- assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
- subPrims[numSubPrims++] = prim;
- return;
- }
-
- /* compute octree level and dimension to perform the split in */
- const unsigned int diff = 31 - lzcnt(lower_code^upper_code);
- const unsigned int level = diff / 3;
- const unsigned int dim = diff % 3;
-
- /* now we compute the grid position of the split */
- const unsigned int isplit = iupper[dim] & ~((1<<level)-1);
-
- /* compute world space position of split */
- const float inv_grid_size = 1.0f / GRID_SIZE;
- const float fsplit = grid_base[dim] + isplit * inv_grid_size * grid_extend;
-
- assert(prim.lower[dim] <= fsplit &&
- prim.upper[dim] >= fsplit);
-
- /* split primitive */
- const auto splitter = Splitter(prim);
- BBox3fa left,right;
- splitter(prim.bounds(),dim,fsplit,left,right);
- assert(!left.empty());
- assert(!right.empty());
-
-
- splitPrimitive(Splitter,PrimRef(left ,geomID,primID),geomID,primID,split_level-1,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
- splitPrimitive(Splitter,PrimRef(right,geomID,primID),geomID,primID,split_level-1,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
- }
- }
-
-
- template<typename Mesh, typename SplitterFactory>
- PrimInfo createPrimRefArray_presplit(Geometry* geometry, unsigned int geomID, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
- {
- ParallelPrefixSumState<PrimInfo> pstate;
-
- /* first try */
- progressMonitor(0);
- PrimInfo pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
- return geometry->createPrimRefArray(prims,r,r.begin(),geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- /* if we need to filter out geometry, run again */
- if (pinfo.size() != numPrimRefs)
- {
- progressMonitor(0);
- pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
- return geometry->createPrimRefArray(prims,r,base.size(),geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- }
- return pinfo;
- }
-
- __forceinline Vec2i computeMC(const Vec3fa &grid_base, const float grid_scale, const PrimRef &ref)
- {
- const Vec3fa lower = ref.lower;
- const Vec3fa upper = ref.upper;
- const Vec3fa glower = (lower-grid_base)*Vec3fa(grid_scale)+Vec3fa(0.2f);
- const Vec3fa gupper = (upper-grid_base)*Vec3fa(grid_scale)-Vec3fa(0.2f);
- Vec3ia ilower(floor(glower));
- Vec3ia iupper(floor(gupper));
-
- /* this ignores dimensions that are empty */
- iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
-
- /* compute a morton code for the lower and upper grid coordinates. */
- const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
- const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
- return Vec2i(lower_code,upper_code);
- }
-
- template<typename Mesh, typename SplitterFactory>
- PrimInfo createPrimRefArray_presplit(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
- {
- static const size_t MIN_STEP_SIZE = 128;
-
- ParallelForForPrefixSumState<PrimInfo> pstate;
- Scene::Iterator2 iter(scene,types,mblur);
-
- /* first try */
- progressMonitor(0);
- pstate.init(iter,size_t(1024));
- PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
- return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- /* if we need to filter out geometry, run again */
- if (pinfo.size() != numPrimRefs)
- {
- progressMonitor(0);
- pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
- return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- }
-
- /* use correct number of primitives */
- size_t numPrimitives = pinfo.size();
- const size_t alloc_numPrimitives = prims.size();
- const size_t numSplitPrimitivesBudget = alloc_numPrimitives - numPrimitives;
-
- /* set up primitive splitter */
- SplitterFactory Splitter(scene);
-
-
- DBG_PRESPLIT(
- const size_t org_numPrimitives = pinfo.size();
- PRINT(numPrimitives);
- PRINT(alloc_numPrimitives);
- PRINT(numSplitPrimitivesBudget);
- );
-
- /* allocate double buffer presplit items */
- const size_t presplit_allocation_size = sizeof(PresplitItem)*alloc_numPrimitives;
- PresplitItem *presplitItem = (PresplitItem*)alignedMalloc(presplit_allocation_size,64);
- PresplitItem *tmp_presplitItem = (PresplitItem*)alignedMalloc(presplit_allocation_size,64);
-
- /* compute grid */
- const Vec3fa grid_base = pinfo.geomBounds.lower;
- const Vec3fa grid_diag = pinfo.geomBounds.size();
- const float grid_extend = max(grid_diag.x,max(grid_diag.y,grid_diag.z));
- const float grid_scale = grid_extend == 0.0f ? 0.0f : GRID_SIZE / grid_extend;
-
- /* init presplit items and get total sum */
- const float psum = parallel_reduce( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), 0.0f, [&](const range<size_t>& r) -> float {
- float sum = 0.0f;
- for (size_t i=r.begin(); i<r.end(); i++)
- {
- presplitItem[i].index = (unsigned int)i;
- const Vec2i mc = computeMC(grid_base,grid_scale,prims[i]);
- /* if all bits are equal then we cannot split */
- presplitItem[i].priority = (mc.x != mc.y) ? PresplitItem::compute_priority<Mesh>(prims[i],scene,mc) : 0.0f;
- /* FIXME: sum undeterministic */
- sum += presplitItem[i].priority;
- }
- return sum;
- },[](const float& a, const float& b) -> float { return a+b; });
-
- /* compute number of splits per primitive */
- const float inv_psum = 1.0f / psum;
- parallel_for( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
- for (size_t i=r.begin(); i<r.end(); i++)
- {
- if (presplitItem[i].priority > 0.0f)
- {
- const float rel_p = (float)numSplitPrimitivesBudget * presplitItem[i].priority * inv_psum;
- if (rel_p >= PRIORITY_CUTOFF_THRESHOLD) // need at least a split budget that generates two sub-prims
- {
- presplitItem[i].priority = max(min(ceilf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG),1.0f);
- //presplitItem[i].priority = min(floorf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG);
- assert(presplitItem[i].priority >= 0.0f && presplitItem[i].priority <= (float)MAX_PRESPLITS_PER_PRIMITIVE_LOG);
- }
- else
- presplitItem[i].priority = 0.0f;
- }
- }
- });
-
- auto isLeft = [&] (const PresplitItem &ref) { return ref.priority < PRIORITY_CUTOFF_THRESHOLD; };
- size_t center = parallel_partitioning(presplitItem,0,numPrimitives,isLeft,1024);
-
- /* anything to split ? */
- if (center < numPrimitives)
- {
- const size_t numPrimitivesToSplit = numPrimitives - center;
- assert(presplitItem[center].priority >= 1.0f);
-
- /* sort presplit items in ascending order */
- radix_sort_u32(presplitItem + center,tmp_presplitItem + center,numPrimitivesToSplit,1024);
-
- CHECK_PRESPLIT(
- parallel_for( size_t(center+1), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
- for (size_t i=r.begin(); i<r.end(); i++)
- assert(presplitItem[i-1].priority <= presplitItem[i].priority);
- });
- );
-
- unsigned int *const primOffset0 = (unsigned int*)tmp_presplitItem;
- unsigned int *const primOffset1 = (unsigned int*)tmp_presplitItem + numPrimitivesToSplit;
-
- /* compute actual number of sub-primitives generated within the [center;numPrimitives-1] range */
- const size_t totalNumSubPrims = parallel_reduce( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), size_t(0), [&](const range<size_t>& t) -> size_t {
- size_t sum = 0;
- for (size_t i=t.begin(); i<t.end(); i++)
- {
- PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
- assert(presplitItem[i].priority >= 1.0f);
- const unsigned int primrefID = presplitItem[i].index;
- const float prio = presplitItem[i].priority;
- const unsigned int geomID = prims[primrefID].geomID();
- const unsigned int primID = prims[primrefID].primID();
- const unsigned int split_levels = (unsigned int)prio;
- unsigned int numSubPrims = 0;
- splitPrimitive(Splitter,prims[primrefID],geomID,primID,split_levels,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
- assert(numSubPrims);
- numSubPrims--; // can reuse slot
- sum+=numSubPrims;
- presplitItem[i].data = (numSubPrims << MAX_PRESPLITS_PER_PRIMITIVE_LOG) | split_levels;
- primOffset0[i-center] = numSubPrims;
- }
- return sum;
- },[](const size_t& a, const size_t& b) -> size_t { return a+b; });
-
- /* if we are over budget, need to shrink the range */
- if (totalNumSubPrims > numSplitPrimitivesBudget)
- {
- size_t new_center = numPrimitives-1;
- size_t sum = 0;
- for (;new_center>=center;new_center--)
- {
- const unsigned int numSubPrims = presplitItem[new_center].data >> MAX_PRESPLITS_PER_PRIMITIVE_LOG;
- if (unlikely(sum + numSubPrims >= numSplitPrimitivesBudget)) break;
- sum += numSubPrims;
- }
- new_center++;
- center = new_center;
- }
-
- /* parallel prefix sum to compute offsets for storing sub-primitives */
- const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus<unsigned int>());
-
- /* iterate over range, and split primitives into sub primitives and append them to prims array */
- parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& rn) -> void {
- for (size_t j=rn.begin(); j<rn.end(); j++)
- {
- PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
- const unsigned int primrefID = presplitItem[j].index;
- const unsigned int geomID = prims[primrefID].geomID();
- const unsigned int primID = prims[primrefID].primID();
- const unsigned int split_levels = presplitItem[j].data & ((unsigned int)(1 << MAX_PRESPLITS_PER_PRIMITIVE_LOG)-1);
-
- assert(split_levels);
- assert(split_levels <= MAX_PRESPLITS_PER_PRIMITIVE_LOG);
- unsigned int numSubPrims = 0;
- splitPrimitive(Splitter,prims[primrefID],geomID,primID,split_levels,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
- const size_t newID = numPrimitives + primOffset1[j-center];
- assert(newID+numSubPrims <= alloc_numPrimitives);
- prims[primrefID] = subPrims[0];
- for (size_t i=1;i<numSubPrims;i++)
- prims[newID+i-1] = subPrims[i];
- }
- });
-
- numPrimitives += offset;
- DBG_PRESPLIT(
- PRINT(pinfo.size());
- PRINT(numPrimitives);
- PRINT((float)numPrimitives/org_numPrimitives));
- }
-
- /* recompute centroid bounding boxes */
- pinfo = parallel_reduce(size_t(0),numPrimitives,size_t(MIN_STEP_SIZE),PrimInfo(empty),[&] (const range<size_t>& r) -> PrimInfo {
- PrimInfo p(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- p.add_center2(prims[j]);
- return p;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- assert(pinfo.size() == numPrimitives);
-
- /* free double buffer presplit items */
- alignedFree(tmp_presplitItem);
- alignedFree(presplitItem);
- return pinfo;
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/splitter.h b/thirdparty/embree-aarch64/kernels/builders/splitter.h
deleted file mode 100644
index dbd6cf07c7..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/splitter.h
+++ /dev/null
@@ -1,169 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/scene.h"
-#include "../common/primref.h"
-
-namespace embree
-{
- namespace isa
- {
- template<size_t N>
- __forceinline void splitPolygon(const BBox3fa& bounds,
- const size_t dim,
- const float pos,
- const Vec3fa (&v)[N+1],
- const Vec3fa (&inv_length)[N],
- BBox3fa& left_o,
- BBox3fa& right_o)
- {
- BBox3fa left = empty, right = empty;
- /* clip triangle to left and right box by processing all edges */
- for (size_t i=0; i<N; i++)
- {
- const Vec3fa &v0 = v[i];
- const Vec3fa &v1 = v[i+1];
- const float v0d = v0[dim];
- const float v1d = v1[dim];
-
- if (v0d <= pos) left. extend(v0); // this point is on left side
- if (v0d >= pos) right.extend(v0); // this point is on right side
-
- if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location
- {
- assert((v1d-v0d) != 0.0f);
- const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length[i][dim]),v1-v0,v0);
- left.extend(c);
- right.extend(c);
- }
- }
-
- /* clip against current bounds */
- left_o = intersect(left,bounds);
- right_o = intersect(right,bounds);
- }
-
- template<size_t N>
- __forceinline void splitPolygon(const PrimRef& prim,
- const size_t dim,
- const float pos,
- const Vec3fa (&v)[N+1],
- PrimRef& left_o,
- PrimRef& right_o)
- {
- BBox3fa left = empty, right = empty;
- for (size_t i=0; i<N; i++)
- {
- const Vec3fa &v0 = v[i];
- const Vec3fa &v1 = v[i+1];
- const float v0d = v0[dim];
- const float v1d = v1[dim];
-
- if (v0d <= pos) left. extend(v0); // this point is on left side
- if (v0d >= pos) right.extend(v0); // this point is on right side
-
- if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location
- {
- assert((v1d-v0d) != 0.0f);
- const float inv_length = 1.0f/(v1d-v0d);
- const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length),v1-v0,v0);
- left.extend(c);
- right.extend(c);
- }
- }
-
- /* clip against current bounds */
- new (&left_o ) PrimRef(intersect(left ,prim.bounds()),prim.geomID(), prim.primID());
- new (&right_o) PrimRef(intersect(right,prim.bounds()),prim.geomID(), prim.primID());
- }
-
- struct TriangleSplitter
- {
- __forceinline TriangleSplitter(const Scene* scene, const PrimRef& prim)
- {
- const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
- const TriangleMesh* mesh = (const TriangleMesh*) scene->get(prim.geomID() & mask );
- TriangleMesh::Triangle tri = mesh->triangle(prim.primID());
- v[0] = mesh->vertex(tri.v[0]);
- v[1] = mesh->vertex(tri.v[1]);
- v[2] = mesh->vertex(tri.v[2]);
- v[3] = mesh->vertex(tri.v[0]);
- inv_length[0] = Vec3fa(1.0f) / (v[1]-v[0]);
- inv_length[1] = Vec3fa(1.0f) / (v[2]-v[1]);
- inv_length[2] = Vec3fa(1.0f) / (v[0]-v[2]);
- }
-
- __forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const {
- splitPolygon<3>(prim,dim,pos,v,left_o,right_o);
- }
-
- __forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const {
- splitPolygon<3>(prim,dim,pos,v,inv_length,left_o,right_o);
- }
-
- private:
- Vec3fa v[4];
- Vec3fa inv_length[3];
- };
-
- struct TriangleSplitterFactory
- {
- __forceinline TriangleSplitterFactory(const Scene* scene)
- : scene(scene) {}
-
- __forceinline TriangleSplitter operator() (const PrimRef& prim) const {
- return TriangleSplitter(scene,prim);
- }
-
- private:
- const Scene* scene;
- };
-
- struct QuadSplitter
- {
- __forceinline QuadSplitter(const Scene* scene, const PrimRef& prim)
- {
- const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
- const QuadMesh* mesh = (const QuadMesh*) scene->get(prim.geomID() & mask );
- QuadMesh::Quad quad = mesh->quad(prim.primID());
- v[0] = mesh->vertex(quad.v[0]);
- v[1] = mesh->vertex(quad.v[1]);
- v[2] = mesh->vertex(quad.v[2]);
- v[3] = mesh->vertex(quad.v[3]);
- v[4] = mesh->vertex(quad.v[0]);
- inv_length[0] = Vec3fa(1.0f) / (v[1]-v[0]);
- inv_length[1] = Vec3fa(1.0f) / (v[2]-v[1]);
- inv_length[2] = Vec3fa(1.0f) / (v[3]-v[2]);
- inv_length[3] = Vec3fa(1.0f) / (v[0]-v[3]);
- }
-
- __forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const {
- splitPolygon<4>(prim,dim,pos,v,left_o,right_o);
- }
-
- __forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const {
- splitPolygon<4>(prim,dim,pos,v,inv_length,left_o,right_o);
- }
-
- private:
- Vec3fa v[5];
- Vec3fa inv_length[4];
- };
-
- struct QuadSplitterFactory
- {
- __forceinline QuadSplitterFactory(const Scene* scene)
- : scene(scene) {}
-
- __forceinline QuadSplitter operator() (const PrimRef& prim) const {
- return QuadSplitter(scene,prim);
- }
-
- private:
- const Scene* scene;
- };
- }
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh.cpp
deleted file mode 100644
index bd102bd6ef..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh.cpp
+++ /dev/null
@@ -1,190 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh.h"
-#include "bvh_statistics.h"
-
-namespace embree
-{
- template<int N>
- BVHN<N>::BVHN (const PrimitiveType& primTy, Scene* scene)
- : AccelData((N==4) ? AccelData::TY_BVH4 : (N==8) ? AccelData::TY_BVH8 : AccelData::TY_UNKNOWN),
- primTy(&primTy), device(scene->device), scene(scene),
- root(emptyNode), alloc(scene->device,scene->isStaticAccel()), numPrimitives(0), numVertices(0)
- {
- }
-
- template<int N>
- BVHN<N>::~BVHN ()
- {
- for (size_t i=0; i<objects.size(); i++)
- delete objects[i];
- }
-
- template<int N>
- void BVHN<N>::clear()
- {
- set(BVHN::emptyNode,empty,0);
- alloc.clear();
- }
-
- template<int N>
- void BVHN<N>::set (NodeRef root, const LBBox3fa& bounds, size_t numPrimitives)
- {
- this->root = root;
- this->bounds = bounds;
- this->numPrimitives = numPrimitives;
- }
-
- template<int N>
- void BVHN<N>::clearBarrier(NodeRef& node)
- {
- if (node.isBarrier())
- node.clearBarrier();
- else if (!node.isLeaf()) {
- BaseNode* n = node.baseNode(); // FIXME: flags should be stored in BVH
- for (size_t c=0; c<N; c++)
- clearBarrier(n->child(c));
- }
- }
-
- template<int N>
- void BVHN<N>::layoutLargeNodes(size_t num)
- {
-#if defined(__X86_64__) || defined(__aarch64__) // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues
- struct NodeArea
- {
- __forceinline NodeArea() {}
-
- __forceinline NodeArea(NodeRef& node, const BBox3fa& bounds)
- : node(&node), A(node.isLeaf() ? float(neg_inf) : area(bounds)) {}
-
- __forceinline bool operator< (const NodeArea& other) const {
- return this->A < other.A;
- }
-
- NodeRef* node;
- float A;
- };
- std::vector<NodeArea> lst;
- lst.reserve(num);
- lst.push_back(NodeArea(root,empty));
-
- while (lst.size() < num)
- {
- std::pop_heap(lst.begin(), lst.end());
- NodeArea n = lst.back(); lst.pop_back();
- if (!n.node->isAABBNode()) break;
- AABBNode* node = n.node->getAABBNode();
- for (size_t i=0; i<N; i++) {
- if (node->child(i) == BVHN::emptyNode) continue;
- lst.push_back(NodeArea(node->child(i),node->bounds(i)));
- std::push_heap(lst.begin(), lst.end());
- }
- }
-
- for (size_t i=0; i<lst.size(); i++)
- lst[i].node->setBarrier();
-
- root = layoutLargeNodesRecursion(root,alloc.getCachedAllocator());
-#endif
- }
-
- template<int N>
- typename BVHN<N>::NodeRef BVHN<N>::layoutLargeNodesRecursion(NodeRef& node, const FastAllocator::CachedAllocator& allocator)
- {
- if (node.isBarrier()) {
- node.clearBarrier();
- return node;
- }
- else if (node.isAABBNode())
- {
- AABBNode* oldnode = node.getAABBNode();
- AABBNode* newnode = (BVHN::AABBNode*) allocator.malloc0(sizeof(BVHN::AABBNode),byteNodeAlignment);
- *newnode = *oldnode;
- for (size_t c=0; c<N; c++)
- newnode->child(c) = layoutLargeNodesRecursion(oldnode->child(c),allocator);
- return encodeNode(newnode);
- }
- else return node;
- }
-
- template<int N>
- double BVHN<N>::preBuild(const std::string& builderName)
- {
- if (builderName == "")
- return inf;
-
- if (device->verbosity(2))
- {
- Lock<MutexSys> lock(g_printMutex);
- std::cout << "building BVH" << N << (builderName.find("MBlur") != std::string::npos ? "MB" : "") << "<" << primTy->name() << "> using " << builderName << " ..." << std::endl << std::flush;
- }
-
- double t0 = 0.0;
- if (device->benchmark || device->verbosity(2)) t0 = getSeconds();
- return t0;
- }
-
- template<int N>
- void BVHN<N>::postBuild(double t0)
- {
- if (t0 == double(inf))
- return;
-
- double dt = 0.0;
- if (device->benchmark || device->verbosity(2))
- dt = getSeconds()-t0;
-
- std::unique_ptr<BVHNStatistics<N>> stat;
-
- /* print statistics */
- if (device->verbosity(2))
- {
- if (!stat) stat.reset(new BVHNStatistics<N>(this));
- const size_t usedBytes = alloc.getUsedBytes();
- Lock<MutexSys> lock(g_printMutex);
- std::cout << "finished BVH" << N << "<" << primTy->name() << "> : " << 1000.0f*dt << "ms, " << 1E-6*double(numPrimitives)/dt << " Mprim/s, " << 1E-9*double(usedBytes)/dt << " GB/s" << std::endl;
-
- if (device->verbosity(2))
- std::cout << stat->str();
-
- if (device->verbosity(2))
- {
- FastAllocator::AllStatistics stat(&alloc);
- for (size_t i=0; i<objects.size(); i++)
- if (objects[i])
- stat = stat + FastAllocator::AllStatistics(&objects[i]->alloc);
-
- stat.print(numPrimitives);
- }
-
- if (device->verbosity(3))
- {
- alloc.print_blocks();
- for (size_t i=0; i<objects.size(); i++)
- if (objects[i])
- objects[i]->alloc.print_blocks();
- }
-
- std::cout << std::flush;
- }
-
- /* benchmark mode */
- if (device->benchmark)
- {
- if (!stat) stat.reset(new BVHNStatistics<N>(this));
- Lock<MutexSys> lock(g_printMutex);
- std::cout << "BENCHMARK_BUILD " << dt << " " << double(numPrimitives)/dt << " " << stat->sah() << " " << stat->bytesUsed() << " BVH" << N << "<" << primTy->name() << ">" << std::endl << std::flush;
- }
- }
-
-#if defined(__AVX__)
- template class BVHN<8>;
-#endif
-
-#if !defined(__AVX__) || !defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42) || defined(__aarch64__)
- template class BVHN<4>;
-#endif
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh.h b/thirdparty/embree-aarch64/kernels/bvh/bvh.h
deleted file mode 100644
index 8fdf912e52..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh.h
+++ /dev/null
@@ -1,235 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-/* include all node types */
-#include "bvh_node_aabb.h"
-#include "bvh_node_aabb_mb.h"
-#include "bvh_node_aabb_mb4d.h"
-#include "bvh_node_obb.h"
-#include "bvh_node_obb_mb.h"
-#include "bvh_node_qaabb.h"
-
-namespace embree
-{
- /*! flags used to enable specific node types in intersectors */
- enum BVHNodeFlags
- {
- BVH_FLAG_ALIGNED_NODE = 0x00001,
- BVH_FLAG_ALIGNED_NODE_MB = 0x00010,
- BVH_FLAG_UNALIGNED_NODE = 0x00100,
- BVH_FLAG_UNALIGNED_NODE_MB = 0x01000,
- BVH_FLAG_QUANTIZED_NODE = 0x100000,
- BVH_FLAG_ALIGNED_NODE_MB4D = 0x1000000,
-
- /* short versions */
- BVH_AN1 = BVH_FLAG_ALIGNED_NODE,
- BVH_AN2 = BVH_FLAG_ALIGNED_NODE_MB,
- BVH_AN2_AN4D = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D,
- BVH_UN1 = BVH_FLAG_UNALIGNED_NODE,
- BVH_UN2 = BVH_FLAG_UNALIGNED_NODE_MB,
- BVH_MB = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_UNALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D,
- BVH_AN1_UN1 = BVH_FLAG_ALIGNED_NODE | BVH_FLAG_UNALIGNED_NODE,
- BVH_AN2_UN2 = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_UNALIGNED_NODE_MB,
- BVH_AN2_AN4D_UN2 = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D | BVH_FLAG_UNALIGNED_NODE_MB,
- BVH_QN1 = BVH_FLAG_QUANTIZED_NODE
- };
-
- /*! Multi BVH with N children. Each node stores the bounding box of
- * it's N children as well as N child references. */
- template<int N>
- class BVHN : public AccelData
- {
- ALIGNED_CLASS_(16);
- public:
-
- /*! forward declaration of node ref type */
- typedef NodeRefPtr<N> NodeRef;
- typedef BaseNode_t<NodeRef,N> BaseNode;
- typedef AABBNode_t<NodeRef,N> AABBNode;
- typedef AABBNodeMB_t<NodeRef,N> AABBNodeMB;
- typedef AABBNodeMB4D_t<NodeRef,N> AABBNodeMB4D;
- typedef OBBNode_t<NodeRef,N> OBBNode;
- typedef OBBNodeMB_t<NodeRef,N> OBBNodeMB;
- typedef QuantizedBaseNode_t<N> QuantizedBaseNode;
- typedef QuantizedBaseNodeMB_t<N> QuantizedBaseNodeMB;
- typedef QuantizedNode_t<NodeRef,N> QuantizedNode;
-
- /*! Number of bytes the nodes and primitives are minimally aligned to.*/
- static const size_t byteAlignment = 16;
- static const size_t byteNodeAlignment = 4*N;
-
- /*! Empty node */
- static const size_t emptyNode = NodeRef::emptyNode;
-
- /*! Invalid node, used as marker in traversal */
- static const size_t invalidNode = NodeRef::invalidNode;
- static const size_t popRay = NodeRef::popRay;
-
- /*! Maximum depth of the BVH. */
- static const size_t maxBuildDepth = 32;
- static const size_t maxBuildDepthLeaf = maxBuildDepth+8;
- static const size_t maxDepth = 2*maxBuildDepthLeaf; // 2x because of two level builder
-
- /*! Maximum number of primitive blocks in a leaf. */
- static const size_t maxLeafBlocks = NodeRef::maxLeafBlocks;
-
- public:
-
- /*! Builder interface to create allocator */
- struct CreateAlloc : public FastAllocator::Create {
- __forceinline CreateAlloc (BVHN* bvh) : FastAllocator::Create(&bvh->alloc) {}
- };
-
- typedef BVHNodeRecord<NodeRef> NodeRecord;
- typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
- typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
-
- public:
-
- /*! BVHN default constructor. */
- BVHN (const PrimitiveType& primTy, Scene* scene);
-
- /*! BVHN destruction */
- ~BVHN ();
-
- /*! clears the acceleration structure */
- void clear();
-
- /*! sets BVH members after build */
- void set (NodeRef root, const LBBox3fa& bounds, size_t numPrimitives);
-
- /*! Clears the barrier bits of a subtree. */
- void clearBarrier(NodeRef& node);
-
- /*! lays out num large nodes of the BVH */
- void layoutLargeNodes(size_t num);
- NodeRef layoutLargeNodesRecursion(NodeRef& node, const FastAllocator::CachedAllocator& allocator);
-
- /*! called by all builders before build starts */
- double preBuild(const std::string& builderName);
-
- /*! called by all builders after build ended */
- void postBuild(double t0);
-
- /*! allocator class */
- struct Allocator {
- BVHN* bvh;
- Allocator (BVHN* bvh) : bvh(bvh) {}
- __forceinline void* operator() (size_t bytes) const {
- return bvh->alloc._threadLocal()->malloc(&bvh->alloc,bytes);
- }
- };
-
- /*! post build cleanup */
- void cleanup() {
- alloc.cleanup();
- }
-
- public:
-
- /*! Encodes a node */
- static __forceinline NodeRef encodeNode(AABBNode* node) { return NodeRef::encodeNode(node); }
- static __forceinline NodeRef encodeNode(AABBNodeMB* node) { return NodeRef::encodeNode(node); }
- static __forceinline NodeRef encodeNode(AABBNodeMB4D* node) { return NodeRef::encodeNode(node); }
- static __forceinline NodeRef encodeNode(OBBNode* node) { return NodeRef::encodeNode(node); }
- static __forceinline NodeRef encodeNode(OBBNodeMB* node) { return NodeRef::encodeNode(node); }
- static __forceinline NodeRef encodeLeaf(void* tri, size_t num) { return NodeRef::encodeLeaf(tri,num); }
- static __forceinline NodeRef encodeTypedLeaf(void* ptr, size_t ty) { return NodeRef::encodeTypedLeaf(ptr,ty); }
-
- public:
-
- /*! Prefetches the node this reference points to */
- __forceinline static void prefetch(const NodeRef ref, int types=0)
- {
-#if defined(__AVX512PF__) // MIC
- if (types != BVH_FLAG_QUANTIZED_NODE) {
- prefetchL2(((char*)ref.ptr)+0*64);
- prefetchL2(((char*)ref.ptr)+1*64);
- if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
- prefetchL2(((char*)ref.ptr)+2*64);
- prefetchL2(((char*)ref.ptr)+3*64);
- }
- if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
- /* KNL still needs L2 prefetches for large nodes */
- prefetchL2(((char*)ref.ptr)+4*64);
- prefetchL2(((char*)ref.ptr)+5*64);
- prefetchL2(((char*)ref.ptr)+6*64);
- prefetchL2(((char*)ref.ptr)+7*64);
- }
- }
- else
- {
- /* todo: reduce if 32bit offsets are enabled */
- prefetchL2(((char*)ref.ptr)+0*64);
- prefetchL2(((char*)ref.ptr)+1*64);
- prefetchL2(((char*)ref.ptr)+2*64);
- }
-#else
- if (types != BVH_FLAG_QUANTIZED_NODE) {
- prefetchL1(((char*)ref.ptr)+0*64);
- prefetchL1(((char*)ref.ptr)+1*64);
- if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
- prefetchL1(((char*)ref.ptr)+2*64);
- prefetchL1(((char*)ref.ptr)+3*64);
- }
- if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
- /* deactivate for large nodes on Xeon, as it introduces regressions */
- //prefetchL1(((char*)ref.ptr)+4*64);
- //prefetchL1(((char*)ref.ptr)+5*64);
- //prefetchL1(((char*)ref.ptr)+6*64);
- //prefetchL1(((char*)ref.ptr)+7*64);
- }
- }
- else
- {
- /* todo: reduce if 32bit offsets are enabled */
- prefetchL1(((char*)ref.ptr)+0*64);
- prefetchL1(((char*)ref.ptr)+1*64);
- prefetchL1(((char*)ref.ptr)+2*64);
- }
-#endif
- }
-
- __forceinline static void prefetchW(const NodeRef ref, int types=0)
- {
- embree::prefetchEX(((char*)ref.ptr)+0*64);
- embree::prefetchEX(((char*)ref.ptr)+1*64);
- if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
- embree::prefetchEX(((char*)ref.ptr)+2*64);
- embree::prefetchEX(((char*)ref.ptr)+3*64);
- }
- if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
- embree::prefetchEX(((char*)ref.ptr)+4*64);
- embree::prefetchEX(((char*)ref.ptr)+5*64);
- embree::prefetchEX(((char*)ref.ptr)+6*64);
- embree::prefetchEX(((char*)ref.ptr)+7*64);
- }
- }
-
- /*! bvh type information */
- public:
- const PrimitiveType* primTy; //!< primitive type stored in the BVH
-
- /*! bvh data */
- public:
- Device* device; //!< device pointer
- Scene* scene; //!< scene pointer
- NodeRef root; //!< root node
- FastAllocator alloc; //!< allocator used to allocate nodes
-
- /*! statistics data */
- public:
- size_t numPrimitives; //!< number of primitives the BVH is build over
- size_t numVertices; //!< number of vertices the BVH references
-
- /*! data arrays for special builders */
- public:
- std::vector<BVHN*> objects;
- vector_t<char,aligned_allocator<char,32>> subdiv_patches;
- };
-
- typedef BVHN<4> BVH4;
- typedef BVHN<8> BVH8;
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.cpp
deleted file mode 100644
index 23f4f63d45..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.cpp
+++ /dev/null
@@ -1,1325 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh4_factory.h"
-#include "../bvh/bvh.h"
-
-#include "../geometry/curveNv.h"
-#include "../geometry/curveNi.h"
-#include "../geometry/curveNi_mb.h"
-#include "../geometry/linei.h"
-#include "../geometry/triangle.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/trianglev_mb.h"
-#include "../geometry/trianglei.h"
-#include "../geometry/quadv.h"
-#include "../geometry/quadi.h"
-#include "../geometry/subdivpatch1.h"
-#include "../geometry/object.h"
-#include "../geometry/instance.h"
-#include "../geometry/subgrid.h"
-#include "../common/accelinstance.h"
-
-namespace embree
-{
- DECLARE_SYMBOL2(Accel::Collider,BVH4ColliderUserGeom);
-
- DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector4i,void);
- DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector8i,void);
- DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector4v,void);
- DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector8v,void);
- DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector4iMB,void);
- DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector8iMB,void);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1MB);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1MB);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4Intersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vIntersector1Pluecker);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Pluecker);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Pluecker);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,QBVH4Triangle4iIntersector1Pluecker);
- DECLARE_SYMBOL2(Accel::Intersector1,QBVH4Quad4iIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1Intersector1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1MBIntersector1);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4VirtualIntersector1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4VirtualMBIntersector1);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4InstanceIntersector1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4InstanceMBIntersector1);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4GridMBIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4HybridMB);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4HybridMB);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vIntersector4HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1Intersector4);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1MBIntersector4);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4VirtualIntersector4Chunk);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4VirtualMBIntersector4Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4InstanceIntersector4Chunk);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4InstanceMBIntersector4Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4GridMBIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8HybridMB);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8HybridMB);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vIntersector8HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1Intersector8);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1MBIntersector8);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4VirtualIntersector8Chunk);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4VirtualMBIntersector8Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4InstanceIntersector8Chunk);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4InstanceMBIntersector8Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4GridMBIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16HybridMB);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16HybridMB);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vIntersector16HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1Intersector16);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1MBIntersector16);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4VirtualIntersector16Chunk);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4VirtualMBIntersector16Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4InstanceIntersector16Chunk);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4InstanceMBIntersector16Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4GridMBIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4IntersectorStreamPacketFallback);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4IntersectorStreamMoeller);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4IntersectorStreamMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4iIntersectorStreamMoeller);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4vIntersectorStreamPluecker);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4iIntersectorStreamPluecker);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4vIntersectorStreamMoeller);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4vIntersectorStreamMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4iIntersectorStreamMoeller);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4vIntersectorStreamPluecker);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4iIntersectorStreamPluecker);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4VirtualIntersectorStream);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH4InstanceIntersectorStream);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4Curve4vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Curve4iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4OBBCurve4iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Curve8iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
- DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1BuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1MBBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshRefitSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshRefitSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
-
- BVH4Factory::BVH4Factory(int bfeatures, int ifeatures)
- {
- SELECT_SYMBOL_DEFAULT_AVX_AVX2(ifeatures,BVH4ColliderUserGeom);
-
- selectBuilders(bfeatures);
- selectIntersectors(ifeatures);
- }
-
- void BVH4Factory::selectBuilders(int features)
- {
- IF_ENABLED_TRIS (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelTriangle4MeshSAH));
- IF_ENABLED_TRIS (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelTriangle4iMeshSAH));
- IF_ENABLED_TRIS (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelTriangle4vMeshSAH));
- IF_ENABLED_QUADS (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelQuadMeshSAH));
- IF_ENABLED_USER (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelVirtualSAH));
- IF_ENABLED_INSTANCE (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelInstanceSAH));
-
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Curve4vBuilder_OBB_New));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Curve4iBuilder_OBB_New));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4OBBCurve4iMBBuilder_OBB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX(features,BVH4Curve8iBuilder_OBB_New));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX(features,BVH4OBBCurve8iMBBuilder_OBB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Triangle4SceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Triangle4vSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Triangle4iSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4iMBSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4vMBSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4QuantizedTriangle4iSceneBuilderSAH));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Quad4vSceneBuilderSAH));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Quad4iSceneBuilderSAH));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Quad4iMBSceneBuilderSAH));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4QuantizedQuad4iSceneBuilderSAH));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4SceneBuilderFastSpatialSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4vSceneBuilderFastSpatialSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4iSceneBuilderFastSpatialSAH));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Quad4vSceneBuilderFastSpatialSAH));
-
- IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4VirtualSceneBuilderSAH));
- IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4VirtualMBSceneBuilderSAH));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4InstanceSceneBuilderSAH));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4InstanceMBSceneBuilderSAH));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4GridSceneBuilderSAH));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4GridMBSceneBuilderSAH));
-
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4SubdivPatch1BuilderSAH));
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4SubdivPatch1MBBuilderSAH));
- }
-
- void BVH4Factory::selectIntersectors(int features)
- {
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector4i));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8i));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector4v));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8v));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector4iMB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8iMB));
-
- /* select intersectors1 */
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector1));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector1MB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust1));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust1MB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4Intersector1Moeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,BVH4Triangle4iIntersector1Moeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,BVH4Triangle4vIntersector1Pluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,BVH4Triangle4iIntersector1Pluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector1Moeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector1Moeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector1Pluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector1Pluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector1Moeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector1Moeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector1Pluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector1Pluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector1Pluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector1Moeller));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,QBVH4Triangle4iIntersector1Pluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,QBVH4Quad4iIntersector1Pluecker));
-
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1Intersector1));
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1MBIntersector1));
-
- IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4VirtualIntersector1));
- IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4VirtualMBIntersector1));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4InstanceIntersector1));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4InstanceMBIntersector1));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector1Moeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridMBIntersector1Moeller))
- IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector1Pluecker));
-
-#if defined (EMBREE_RAY_PACKETS)
-
- /* select intersectors4 */
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector4Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector4HybridMB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust4Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust4HybridMB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4Intersector4HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4Intersector4HybridMoellerNoFilter));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iIntersector4HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vIntersector4HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iIntersector4HybridPluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector4HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector4HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector4HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector4HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector4HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector4HybridMoellerNoFilter));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector4HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector4HybridPluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector4HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector4HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector4HybridPluecker));
-
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1Intersector4));
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1MBIntersector4));
-
- IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4VirtualIntersector4Chunk));
- IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4VirtualMBIntersector4Chunk));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4InstanceIntersector4Chunk));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4InstanceMBIntersector4Chunk));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector4HybridMoeller));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector4HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridMBIntersector4HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector4HybridPluecker));
-
- /* select intersectors8 */
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector8Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector8HybridMB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust8Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust8HybridMB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4Intersector8HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4Intersector8HybridMoellerNoFilter));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iIntersector8HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vIntersector8HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iIntersector8HybridPluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector8HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector8HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector8HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector8HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector8HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector8HybridMoellerNoFilter));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector8HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector8HybridPluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector8HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector8HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector8HybridPluecker));
-
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1Intersector8));
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1MBIntersector8));
-
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4VirtualIntersector8Chunk));
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4VirtualMBIntersector8Chunk));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4InstanceIntersector8Chunk));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4InstanceMBIntersector8Chunk));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector8HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4GridMBIntersector8HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector8HybridPluecker));
-
- /* select intersectors16 */
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4OBBVirtualCurveIntersector16Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4OBBVirtualCurveIntersector16HybridMB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust16Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust16HybridMB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4Intersector16HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4Intersector16HybridMoellerNoFilter));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4iIntersector16HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4vIntersector16HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4iIntersector16HybridPluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4vMBIntersector16HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4iMBIntersector16HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4vMBIntersector16HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4iMBIntersector16HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersector16HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersector16HybridMoellerNoFilter));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4iIntersector16HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersector16HybridPluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4iIntersector16HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4iMBIntersector16HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4iMBIntersector16HybridPluecker));
-
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4SubdivPatch1Intersector16));
- IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4SubdivPatch1MBIntersector16));
-
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4VirtualIntersector16Chunk));
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4VirtualMBIntersector16Chunk));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4InstanceIntersector16Chunk));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4InstanceMBIntersector16Chunk));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4GridIntersector16HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4GridMBIntersector16HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4GridIntersector16HybridPluecker));
-
- /* select stream intersectors */
- SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4IntersectorStreamPacketFallback);
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4IntersectorStreamMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4IntersectorStreamMoellerNoFilter));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4iIntersectorStreamMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4vIntersectorStreamPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4iIntersectorStreamPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersectorStreamMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersectorStreamMoellerNoFilter));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4iIntersectorStreamMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersectorStreamPluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4iIntersectorStreamPluecker));
-
- IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4VirtualIntersectorStream));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4InstanceIntersectorStream));
-
-#endif
- }
-
- Accel::Intersectors BVH4Factory::BVH4OBBVirtualCurveIntersectors(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.leafIntersector = leafIntersector;
- intersectors.intersector1 = BVH4OBBVirtualCurveIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4OBBVirtualCurveIntersector4Hybrid();
- intersectors.intersector8 = BVH4OBBVirtualCurveIntersector8Hybrid();
- intersectors.intersector16 = BVH4OBBVirtualCurveIntersector16Hybrid();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.leafIntersector = leafIntersector;
- intersectors.intersector1 = BVH4OBBVirtualCurveIntersectorRobust1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4OBBVirtualCurveIntersectorRobust4Hybrid();
- intersectors.intersector8 = BVH4OBBVirtualCurveIntersectorRobust8Hybrid();
- intersectors.intersector16 = BVH4OBBVirtualCurveIntersectorRobust16Hybrid();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- default: assert(false);
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH4Factory::BVH4OBBVirtualCurveIntersectorsMB(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.leafIntersector = leafIntersector;
- intersectors.intersector1 = BVH4OBBVirtualCurveIntersector1MB();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4OBBVirtualCurveIntersector4HybridMB();
- intersectors.intersector8 = BVH4OBBVirtualCurveIntersector8HybridMB();
- intersectors.intersector16 = BVH4OBBVirtualCurveIntersector16HybridMB();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.leafIntersector = leafIntersector;
- intersectors.intersector1 = BVH4OBBVirtualCurveIntersectorRobust1MB();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4OBBVirtualCurveIntersectorRobust4HybridMB();
- intersectors.intersector8 = BVH4OBBVirtualCurveIntersectorRobust8HybridMB();
- intersectors.intersector16 = BVH4OBBVirtualCurveIntersectorRobust16HybridMB();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- default: assert(false);
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH4Factory::BVH4Triangle4Intersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- assert(ivariant == IntersectVariant::FAST);
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Triangle4Intersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4_filter = BVH4Triangle4Intersector4HybridMoeller();
- intersectors.intersector4_nofilter = BVH4Triangle4Intersector4HybridMoellerNoFilter();
- intersectors.intersector8_filter = BVH4Triangle4Intersector8HybridMoeller();
- intersectors.intersector8_nofilter = BVH4Triangle4Intersector8HybridMoellerNoFilter();
- intersectors.intersector16_filter = BVH4Triangle4Intersector16HybridMoeller();
- intersectors.intersector16_nofilter = BVH4Triangle4Intersector16HybridMoellerNoFilter();
- intersectors.intersectorN_filter = BVH4Triangle4IntersectorStreamMoeller();
- intersectors.intersectorN_nofilter = BVH4Triangle4IntersectorStreamMoellerNoFilter();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4Triangle4vIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- assert(ivariant == IntersectVariant::ROBUST);
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Triangle4vIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Triangle4vIntersector4HybridPluecker();
- intersectors.intersector8 = BVH4Triangle4vIntersector8HybridPluecker();
- intersectors.intersector16 = BVH4Triangle4vIntersector16HybridPluecker();
- intersectors.intersectorN = BVH4Triangle4vIntersectorStreamPluecker();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4Triangle4iIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Triangle4iIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Triangle4iIntersector4HybridMoeller();
- intersectors.intersector8 = BVH4Triangle4iIntersector8HybridMoeller();
- intersectors.intersector16 = BVH4Triangle4iIntersector16HybridMoeller();
- intersectors.intersectorN = BVH4Triangle4iIntersectorStreamMoeller();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Triangle4iIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Triangle4iIntersector4HybridPluecker();
- intersectors.intersector8 = BVH4Triangle4iIntersector8HybridPluecker();
- intersectors.intersector16 = BVH4Triangle4iIntersector16HybridPluecker();
- intersectors.intersectorN = BVH4Triangle4iIntersectorStreamPluecker();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH4Factory::BVH4Triangle4vMBIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Triangle4vMBIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Triangle4vMBIntersector4HybridMoeller();
- intersectors.intersector8 = BVH4Triangle4vMBIntersector8HybridMoeller();
- intersectors.intersector16 = BVH4Triangle4vMBIntersector16HybridMoeller();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Triangle4vMBIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Triangle4vMBIntersector4HybridPluecker();
- intersectors.intersector8 = BVH4Triangle4vMBIntersector8HybridPluecker();
- intersectors.intersector16 = BVH4Triangle4vMBIntersector16HybridPluecker();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH4Factory::BVH4Triangle4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Triangle4iMBIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Triangle4iMBIntersector4HybridMoeller();
- intersectors.intersector8 = BVH4Triangle4iMBIntersector8HybridMoeller();
- intersectors.intersector16 = BVH4Triangle4iMBIntersector16HybridMoeller();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Triangle4iMBIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Triangle4iMBIntersector4HybridPluecker();
- intersectors.intersector8 = BVH4Triangle4iMBIntersector8HybridPluecker();
- intersectors.intersector16 = BVH4Triangle4iMBIntersector16HybridPluecker();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH4Factory::BVH4Quad4vIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Quad4vIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4_filter = BVH4Quad4vIntersector4HybridMoeller();
- intersectors.intersector4_nofilter = BVH4Quad4vIntersector4HybridMoellerNoFilter();
- intersectors.intersector8_filter = BVH4Quad4vIntersector8HybridMoeller();
- intersectors.intersector8_nofilter = BVH4Quad4vIntersector8HybridMoellerNoFilter();
- intersectors.intersector16_filter = BVH4Quad4vIntersector16HybridMoeller();
- intersectors.intersector16_nofilter = BVH4Quad4vIntersector16HybridMoellerNoFilter();
- intersectors.intersectorN_filter = BVH4Quad4vIntersectorStreamMoeller();
- intersectors.intersectorN_nofilter = BVH4Quad4vIntersectorStreamMoellerNoFilter();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Quad4vIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Quad4vIntersector4HybridPluecker();
- intersectors.intersector8 = BVH4Quad4vIntersector8HybridPluecker();
- intersectors.intersector16 = BVH4Quad4vIntersector16HybridPluecker();
- intersectors.intersectorN = BVH4Quad4vIntersectorStreamPluecker();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH4Factory::BVH4Quad4iIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Quad4iIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Quad4iIntersector4HybridMoeller();
- intersectors.intersector8 = BVH4Quad4iIntersector8HybridMoeller();
- intersectors.intersector16= BVH4Quad4iIntersector16HybridMoeller();
- intersectors.intersectorN = BVH4Quad4iIntersectorStreamMoeller();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Quad4iIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Quad4iIntersector4HybridPluecker();
- intersectors.intersector8 = BVH4Quad4iIntersector8HybridPluecker();
- intersectors.intersector16= BVH4Quad4iIntersector16HybridPluecker();
- intersectors.intersectorN = BVH4Quad4iIntersectorStreamPluecker();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH4Factory::BVH4Quad4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Quad4iMBIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Quad4iMBIntersector4HybridMoeller();
- intersectors.intersector8 = BVH4Quad4iMBIntersector8HybridMoeller();
- intersectors.intersector16= BVH4Quad4iMBIntersector16HybridMoeller();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4Quad4iMBIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4Quad4iMBIntersector4HybridPluecker();
- intersectors.intersector8 = BVH4Quad4iMBIntersector8HybridPluecker();
- intersectors.intersector16= BVH4Quad4iMBIntersector16HybridPluecker();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH4Factory::QBVH4Triangle4iIntersectors(BVH4* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = QBVH4Triangle4iIntersector1Pluecker();
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::QBVH4Quad4iIntersectors(BVH4* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = QBVH4Quad4iIntersector1Pluecker();
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4UserGeometryIntersectors(BVH4* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4VirtualIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4VirtualIntersector4Chunk();
- intersectors.intersector8 = BVH4VirtualIntersector8Chunk();
- intersectors.intersector16 = BVH4VirtualIntersector16Chunk();
- intersectors.intersectorN = BVH4VirtualIntersectorStream();
-#endif
- intersectors.collider = BVH4ColliderUserGeom();
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4UserGeometryMBIntersectors(BVH4* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4VirtualMBIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4VirtualMBIntersector4Chunk();
- intersectors.intersector8 = BVH4VirtualMBIntersector8Chunk();
- intersectors.intersector16 = BVH4VirtualMBIntersector16Chunk();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4InstanceIntersectors(BVH4* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4InstanceIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4InstanceIntersector4Chunk();
- intersectors.intersector8 = BVH4InstanceIntersector8Chunk();
- intersectors.intersector16 = BVH4InstanceIntersector16Chunk();
- intersectors.intersectorN = BVH4InstanceIntersectorStream();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4InstanceMBIntersectors(BVH4* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4InstanceMBIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4InstanceMBIntersector4Chunk();
- intersectors.intersector8 = BVH4InstanceMBIntersector8Chunk();
- intersectors.intersector16 = BVH4InstanceMBIntersector16Chunk();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4SubdivPatch1Intersectors(BVH4* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4SubdivPatch1Intersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4SubdivPatch1Intersector4();
- intersectors.intersector8 = BVH4SubdivPatch1Intersector8();
- intersectors.intersector16 = BVH4SubdivPatch1Intersector16();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4SubdivPatch1MBIntersectors(BVH4* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4SubdivPatch1MBIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4SubdivPatch1MBIntersector4();
- intersectors.intersector8 = BVH4SubdivPatch1MBIntersector8();
- intersectors.intersector16 = BVH4SubdivPatch1MBIntersector16();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
-
- Accel* BVH4Factory::BVH4OBBVirtualCurve4i(Scene* scene, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Curve4i::type,scene);
- Accel::Intersectors intersectors = BVH4OBBVirtualCurveIntersectors(accel,VirtualCurveIntersector4i(),ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->hair_builder == "default" ) builder = BVH4Curve4iBuilder_OBB_New(accel,scene,0);
- else if (scene->device->hair_builder == "sah" ) builder = BVH4Curve4iBuilder_OBB_New(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->hair_builder+" for BVH4OBB<VirtualCurve4i>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
-#if defined(EMBREE_TARGET_SIMD8)
- Accel* BVH4Factory::BVH4OBBVirtualCurve8i(Scene* scene, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Curve8i::type,scene);
- Accel::Intersectors intersectors = BVH4OBBVirtualCurveIntersectors(accel,VirtualCurveIntersector8i(),ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->hair_builder == "default" ) builder = BVH4Curve8iBuilder_OBB_New(accel,scene,0);
- else if (scene->device->hair_builder == "sah" ) builder = BVH4Curve8iBuilder_OBB_New(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->hair_builder+" for BVH4OBB<VirtualCurve8i>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-#endif
-
- Accel* BVH4Factory::BVH4OBBVirtualCurve4v(Scene* scene, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Curve4v::type,scene);
- Accel::Intersectors intersectors = BVH4OBBVirtualCurveIntersectors(accel,VirtualCurveIntersector4v(),ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->hair_builder == "default" ) builder = BVH4Curve4vBuilder_OBB_New(accel,scene,0);
- else if (scene->device->hair_builder == "sah" ) builder = BVH4Curve4vBuilder_OBB_New(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->hair_builder+" for BVH4OBB<VirtualCurve4v>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4OBBVirtualCurve4iMB(Scene* scene, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Curve4iMB::type,scene);
- Accel::Intersectors intersectors = BVH4OBBVirtualCurveIntersectorsMB(accel,VirtualCurveIntersector4iMB(),ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->hair_builder == "default" ) builder = BVH4OBBCurve4iMBBuilder_OBB(accel,scene,0);
- else if (scene->device->hair_builder == "sah" ) builder = BVH4OBBCurve4iMBBuilder_OBB(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->hair_builder+" for BVH4OBB<VirtualCurve4iMB>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
-#if defined(EMBREE_TARGET_SIMD8)
- Accel* BVH4Factory::BVH4OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Curve8iMB::type,scene);
- Accel::Intersectors intersectors = BVH4OBBVirtualCurveIntersectorsMB(accel,VirtualCurveIntersector8iMB(), ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->hair_builder == "default" ) builder = BVH4OBBCurve8iMBBuilder_OBB(accel,scene,0);
- else if (scene->device->hair_builder == "sah" ) builder = BVH4OBBCurve8iMBBuilder_OBB(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->hair_builder+" for BVH4OBB<VirtualCurve8iMB>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-#endif
-
- Accel* BVH4Factory::BVH4Triangle4(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Triangle4::type,scene);
-
- Accel::Intersectors intersectors;
- if (scene->device->tri_traverser == "default") intersectors = BVH4Triangle4Intersectors(accel,ivariant);
- else if (scene->device->tri_traverser == "fast" ) intersectors = BVH4Triangle4Intersectors(accel,IntersectVariant::FAST);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown traverser "+scene->device->tri_traverser+" for BVH4<Triangle4>");
-
- Builder* builder = nullptr;
- if (scene->device->tri_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4Triangle4SceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelTriangle4MeshSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: builder = BVH4Triangle4SceneBuilderFastSpatialSAH(accel,scene,0); break;
- }
- }
- else if (scene->device->tri_builder == "sah" ) builder = BVH4Triangle4SceneBuilderSAH(accel,scene,0);
- else if (scene->device->tri_builder == "sah_fast_spatial" ) builder = BVH4Triangle4SceneBuilderFastSpatialSAH(accel,scene,0);
- else if (scene->device->tri_builder == "sah_presplit") builder = BVH4Triangle4SceneBuilderSAH(accel,scene,MODE_HIGH_QUALITY);
- else if (scene->device->tri_builder == "dynamic" ) builder = BVH4BuilderTwoLevelTriangle4MeshSAH(accel,scene,false);
- else if (scene->device->tri_builder == "morton" ) builder = BVH4BuilderTwoLevelTriangle4MeshSAH(accel,scene,true);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH4<Triangle4>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4Triangle4v(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Triangle4v::type,scene);
-
- Accel::Intersectors intersectors;
- if (scene->device->tri_traverser == "default") intersectors = BVH4Triangle4vIntersectors(accel,ivariant);
- else if (scene->device->tri_traverser == "fast" ) intersectors = BVH4Triangle4vIntersectors(accel,IntersectVariant::FAST);
- else if (scene->device->tri_traverser == "robust" ) intersectors = BVH4Triangle4vIntersectors(accel,IntersectVariant::ROBUST);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown traverser "+scene->device->tri_traverser+" for BVH4<Triangle4>");
-
- Builder* builder = nullptr;
- if (scene->device->tri_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4Triangle4vSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelTriangle4vMeshSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: builder = BVH4Triangle4vSceneBuilderFastSpatialSAH(accel,scene,0); break;
- }
- }
- else if (scene->device->tri_builder == "sah" ) builder = BVH4Triangle4vSceneBuilderSAH(accel,scene,0);
- else if (scene->device->tri_builder == "sah_fast_spatial" ) builder = BVH4Triangle4vSceneBuilderFastSpatialSAH(accel,scene,0);
- else if (scene->device->tri_builder == "sah_presplit") builder = BVH4Triangle4vSceneBuilderSAH(accel,scene,MODE_HIGH_QUALITY);
- else if (scene->device->tri_builder == "dynamic" ) builder = BVH4BuilderTwoLevelTriangle4vMeshSAH(accel,scene,false);
- else if (scene->device->tri_builder == "morton" ) builder = BVH4BuilderTwoLevelTriangle4vMeshSAH(accel,scene,true);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH4<Triangle4v>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4Triangle4i(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Triangle4i::type,scene);
-
- Accel::Intersectors intersectors;
- if (scene->device->tri_traverser == "default") intersectors = BVH4Triangle4iIntersectors(accel,ivariant);
- else if (scene->device->tri_traverser == "fast" ) intersectors = BVH4Triangle4iIntersectors(accel,IntersectVariant::FAST);
- else if (scene->device->tri_traverser == "robust" ) intersectors = BVH4Triangle4iIntersectors(accel,IntersectVariant::ROBUST);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown traverser "+scene->device->tri_traverser+" for BVH4<Triangle4i>");
-
- Builder* builder = nullptr;
- if (scene->device->tri_builder == "default" ) {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4Triangle4iSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelTriangle4iMeshSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: builder = BVH4Triangle4iSceneBuilderFastSpatialSAH(accel,scene,0); break;
- }
- }
- else if (scene->device->tri_builder == "sah" ) builder = BVH4Triangle4iSceneBuilderSAH(accel,scene,0);
- else if (scene->device->tri_builder == "sah_fast_spatial" ) builder = BVH4Triangle4iSceneBuilderFastSpatialSAH(accel,scene,0);
- else if (scene->device->tri_builder == "sah_presplit") builder = BVH4Triangle4iSceneBuilderSAH(accel,scene,MODE_HIGH_QUALITY);
- else if (scene->device->tri_builder == "dynamic" ) builder = BVH4BuilderTwoLevelTriangle4iMeshSAH(accel,scene,false);
- else if (scene->device->tri_builder == "morton" ) builder = BVH4BuilderTwoLevelTriangle4iMeshSAH(accel,scene,true);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH4<Triangle4i>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4Triangle4iMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Triangle4i::type,scene);
-
- Accel::Intersectors intersectors;
- if (scene->device->tri_traverser_mb == "default") intersectors = BVH4Triangle4iMBIntersectors(accel,ivariant);
- else if (scene->device->tri_traverser_mb == "fast" ) intersectors = BVH4Triangle4iMBIntersectors(accel,IntersectVariant::FAST);
- else if (scene->device->tri_traverser_mb == "robust" ) intersectors = BVH4Triangle4iMBIntersectors(accel,IntersectVariant::ROBUST);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown traverser "+scene->device->tri_traverser_mb+" for BVH4<Triangle4iMB>");
-
- Builder* builder = nullptr;
- if (scene->device->tri_builder_mb == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4Triangle4iMBSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->tri_builder_mb == "internal_time_splits") builder = BVH4Triangle4iMBSceneBuilderSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder_mb+" for BVH4<Triangle4iMB>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4Triangle4vMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Triangle4vMB::type,scene);
-
- Accel::Intersectors intersectors;
- if (scene->device->tri_traverser_mb == "default") intersectors = BVH4Triangle4vMBIntersectors(accel,ivariant);
- else if (scene->device->tri_traverser_mb == "fast" ) intersectors = BVH4Triangle4vMBIntersectors(accel,IntersectVariant::FAST);
- else if (scene->device->tri_traverser_mb == "robust" ) intersectors = BVH4Triangle4vMBIntersectors(accel,IntersectVariant::ROBUST);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown traverser "+scene->device->tri_traverser_mb+" for BVH4<Triangle4vMB>");
-
- Builder* builder = nullptr;
- if (scene->device->tri_builder_mb == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4Triangle4vMBSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->tri_builder_mb == "internal_time_splits") builder = BVH4Triangle4vMBSceneBuilderSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder_mb+" for BVH4<Triangle4vMB>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4Quad4v(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Quad4v::type,scene);
- Accel::Intersectors intersectors = BVH4Quad4vIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->quad_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4Quad4vSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelQuadMeshSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: builder = BVH4Quad4vSceneBuilderFastSpatialSAH(accel,scene,0); break;
- }
- }
- else if (scene->device->quad_builder == "sah" ) builder = BVH4Quad4vSceneBuilderSAH(accel,scene,0);
- else if (scene->device->quad_builder == "sah_fast_spatial" ) builder = BVH4Quad4vSceneBuilderFastSpatialSAH(accel,scene,0);
- else if (scene->device->quad_builder == "dynamic" ) builder = BVH4BuilderTwoLevelQuadMeshSAH(accel,scene,false);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder+" for BVH4<Quad4v>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4Quad4i(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Quad4i::type,scene);
- Accel::Intersectors intersectors = BVH4Quad4iIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->quad_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4Quad4iSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement
- case BuildVariant::HIGH_QUALITY: assert(false); break; // FIXME: implement
- }
- }
- else if (scene->device->quad_builder == "sah") builder = BVH4Quad4iSceneBuilderSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder+" for BVH4<Quad4i>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4Quad4iMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(Quad4i::type,scene);
- Accel::Intersectors intersectors = BVH4Quad4iMBIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->quad_builder_mb == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4Quad4iMBSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->quad_builder_mb == "sah") builder = BVH4Quad4iMBSceneBuilderSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder_mb+" for BVH4<Quad4iMB>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4QuantizedQuad4i(Scene* scene)
- {
- BVH4* accel = new BVH4(Quad4i::type,scene);
- Builder* builder = BVH4QuantizedQuad4iSceneBuilderSAH(accel,scene,0);
- Accel::Intersectors intersectors = QBVH4Quad4iIntersectors(accel);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4QuantizedTriangle4i(Scene* scene)
- {
- BVH4* accel = new BVH4(Triangle4i::type,scene);
- Builder* builder = BVH4QuantizedTriangle4iSceneBuilderSAH(accel,scene,0);
- Accel::Intersectors intersectors = QBVH4Triangle4iIntersectors(accel);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4SubdivPatch1(Scene* scene)
- {
- BVH4* accel = new BVH4(SubdivPatch1::type,scene);
- Accel::Intersectors intersectors = BVH4SubdivPatch1Intersectors(accel);
- Builder* builder = BVH4SubdivPatch1BuilderSAH(accel,scene,0);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4SubdivPatch1MB(Scene* scene)
- {
- BVH4* accel = new BVH4(SubdivPatch1::type,scene);
- Accel::Intersectors intersectors = BVH4SubdivPatch1MBIntersectors(accel);
- Builder* builder = BVH4SubdivPatch1MBBuilderSAH(accel,scene,0);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4UserGeometry(Scene* scene, BuildVariant bvariant)
- {
- BVH4* accel = new BVH4(Object::type,scene);
- Accel::Intersectors intersectors = BVH4UserGeometryIntersectors(accel);
-
- Builder* builder = nullptr;
- if (scene->device->object_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4VirtualSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelVirtualSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->object_builder == "sah") builder = BVH4VirtualSceneBuilderSAH(accel,scene,0);
- else if (scene->device->object_builder == "dynamic") builder = BVH4BuilderTwoLevelVirtualSAH(accel,scene,false);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH4<Object>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4UserGeometryMB(Scene* scene)
- {
- BVH4* accel = new BVH4(Object::type,scene);
- Accel::Intersectors intersectors = BVH4UserGeometryMBIntersectors(accel);
- Builder* builder = BVH4VirtualMBSceneBuilderSAH(accel,scene,0);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4Instance(Scene* scene, bool isExpensive, BuildVariant bvariant)
- {
- BVH4* accel = new BVH4(InstancePrimitive::type,scene);
- Accel::Intersectors intersectors = BVH4InstanceIntersectors(accel);
- auto gtype = isExpensive ? Geometry::MTY_INSTANCE_EXPENSIVE : Geometry::MTY_INSTANCE_CHEAP;
- // Builder* builder = BVH4InstanceSceneBuilderSAH(accel,scene,gtype);
-
- Builder* builder = nullptr;
- if (scene->device->object_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH4InstanceSceneBuilderSAH(accel,scene,gtype); break;
- case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelInstanceSAH(accel,scene,gtype,false); break;
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->object_builder == "sah") builder = BVH4InstanceSceneBuilderSAH(accel,scene,gtype);
- else if (scene->device->object_builder == "dynamic") builder = BVH4BuilderTwoLevelInstanceSAH(accel,scene,gtype,false);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH4<Object>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4InstanceMB(Scene* scene, bool isExpensive)
- {
- BVH4* accel = new BVH4(InstancePrimitive::type,scene);
- Accel::Intersectors intersectors = BVH4InstanceMBIntersectors(accel);
- auto gtype = isExpensive ? Geometry::MTY_INSTANCE_EXPENSIVE : Geometry::MTY_INSTANCE_CHEAP;
- Builder* builder = BVH4InstanceMBSceneBuilderSAH(accel,scene,gtype);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel::Intersectors BVH4Factory::BVH4GridIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- if (ivariant == IntersectVariant::FAST)
- {
- intersectors.intersector1 = BVH4GridIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4GridIntersector4HybridMoeller();
- intersectors.intersector8 = BVH4GridIntersector8HybridMoeller();
- intersectors.intersector16 = BVH4GridIntersector16HybridMoeller();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- }
- else /* if (ivariant == IntersectVariant::ROBUST) */
- {
- intersectors.intersector1 = BVH4GridIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4GridIntersector4HybridPluecker();
- intersectors.intersector8 = BVH4GridIntersector8HybridPluecker();
- intersectors.intersector16 = BVH4GridIntersector16HybridPluecker();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- }
- return intersectors;
- }
-
- Accel::Intersectors BVH4Factory::BVH4GridMBIntersectors(BVH4* bvh, IntersectVariant ivariant)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH4GridMBIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH4GridMBIntersector4HybridMoeller();
- intersectors.intersector8 = BVH4GridMBIntersector8HybridMoeller();
- intersectors.intersector16 = BVH4GridMBIntersector16HybridMoeller();
- intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
-
- Accel* BVH4Factory::BVH4Grid(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(SubGridQBVH4::type,scene);
- Accel::Intersectors intersectors = BVH4GridIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->object_builder == "default") {
- builder = BVH4GridSceneBuilderSAH(accel,scene,0);
- }
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->grid_builder+" for BVH4<GridMesh>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH4Factory::BVH4GridMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH4* accel = new BVH4(SubGridQBVH4::type,scene);
- Accel::Intersectors intersectors = BVH4GridMBIntersectors(accel,ivariant);
- Builder* builder = nullptr;
- if (scene->device->object_builder == "default") {
- builder = BVH4GridMBSceneBuilderSAH(accel,scene,0);
- }
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->grid_builder+" for BVH4MB<GridMesh>");
- return new AccelInstance(accel,builder,intersectors);
- }
-
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.h b/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.h
deleted file mode 100644
index a68227b41f..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.h
+++ /dev/null
@@ -1,316 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_factory.h"
-
-namespace embree
-{
- /*! BVH4 instantiations */
- class BVH4Factory : public BVHFactory
- {
- public:
- BVH4Factory(int bfeatures, int ifeatures);
-
- public:
- Accel* BVH4OBBVirtualCurve4i(Scene* scene, IntersectVariant ivariant);
- Accel* BVH4OBBVirtualCurve4v(Scene* scene, IntersectVariant ivariant);
- Accel* BVH4OBBVirtualCurve8i(Scene* scene, IntersectVariant ivariant);
- Accel* BVH4OBBVirtualCurve4iMB(Scene* scene, IntersectVariant ivariant);
- Accel* BVH4OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant);
- DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4i);
- DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8i);
- DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4v);
- DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8v);
- DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4iMB);
- DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8iMB);
-
- Accel* BVH4Triangle4 (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH4Triangle4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::ROBUST);
- Accel* BVH4Triangle4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH4Triangle4vMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH4Triangle4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
-
- Accel* BVH4Quad4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH4Quad4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH4Quad4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
-
- Accel* BVH4QuantizedTriangle4i(Scene* scene);
- Accel* BVH4QuantizedQuad4i(Scene* scene);
-
- Accel* BVH4SubdivPatch1(Scene* scene);
- Accel* BVH4SubdivPatch1MB(Scene* scene);
-
- Accel* BVH4UserGeometry(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
- Accel* BVH4UserGeometryMB(Scene* scene);
-
- Accel* BVH4Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC);
- Accel* BVH4InstanceMB(Scene* scene, bool isExpensive);
-
- Accel* BVH4Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH4GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
-
- private:
- void selectBuilders(int features);
- void selectIntersectors(int features);
-
- private:
- Accel::Intersectors BVH4OBBVirtualCurveIntersectors(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
- Accel::Intersectors BVH4OBBVirtualCurveIntersectorsMB(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
-
- Accel::Intersectors BVH4Triangle4Intersectors(BVH4* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH4Triangle4vIntersectors(BVH4* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH4Triangle4iIntersectors(BVH4* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH4Triangle4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH4Triangle4vMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
-
- Accel::Intersectors BVH4Quad4vIntersectors(BVH4* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH4Quad4iIntersectors(BVH4* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH4Quad4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
-
- Accel::Intersectors QBVH4Quad4iIntersectors(BVH4* bvh);
- Accel::Intersectors QBVH4Triangle4iIntersectors(BVH4* bvh);
-
- Accel::Intersectors BVH4UserGeometryIntersectors(BVH4* bvh);
- Accel::Intersectors BVH4UserGeometryMBIntersectors(BVH4* bvh);
-
- Accel::Intersectors BVH4InstanceIntersectors(BVH4* bvh);
- Accel::Intersectors BVH4InstanceMBIntersectors(BVH4* bvh);
-
- Accel::Intersectors BVH4SubdivPatch1Intersectors(BVH4* bvh);
- Accel::Intersectors BVH4SubdivPatch1MBIntersectors(BVH4* bvh);
-
- Accel::Intersectors BVH4GridIntersectors(BVH4* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH4GridMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
-
- private:
-
- DEFINE_SYMBOL2(Accel::Collider,BVH4ColliderUserGeom);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1MB);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1MB);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4Intersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vIntersector1Pluecker);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Pluecker);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Pluecker);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,QBVH4Triangle4iIntersector1Pluecker);
- DEFINE_SYMBOL2(Accel::Intersector1,QBVH4Quad4iIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1Intersector1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1MBIntersector1);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4VirtualIntersector1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4VirtualMBIntersector1);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceIntersector1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceMBIntersector1);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridMBIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4HybridMB);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4HybridMB);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vIntersector4HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1Intersector4);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1MBIntersector4);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4VirtualIntersector4Chunk);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4VirtualMBIntersector4Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceIntersector4Chunk);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceMBIntersector4Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridMBIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridPluecker);
-
- // ==============
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8HybridMB);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8HybridMB);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vIntersector8HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1Intersector8);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1MBIntersector8);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4VirtualIntersector8Chunk);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4VirtualMBIntersector8Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceIntersector8Chunk);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceMBIntersector8Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridMBIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridPluecker);
-
- // ==============
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16HybridMB);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16HybridMB);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vIntersector16HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1Intersector16);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1MBIntersector16);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4VirtualIntersector16Chunk);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4VirtualMBIntersector16Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceIntersector16Chunk);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceMBIntersector16Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridMBIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridPluecker);
-
- // ==============
-
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4IntersectorStreamPacketFallback);
-
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4IntersectorStreamMoeller);
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4IntersectorStreamMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4iIntersectorStreamMoeller);
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4vIntersectorStreamPluecker);
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4iIntersectorStreamPluecker);
-
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4vIntersectorStreamMoeller);
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4vIntersectorStreamMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4iIntersectorStreamMoeller);
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4vIntersectorStreamPluecker);
- DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4iIntersectorStreamPluecker);
-
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH4VirtualIntersectorStream);
-
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH4InstanceIntersectorStream);
-
- // SAH scene builders
- private:
- DEFINE_ISA_FUNCTION(Builder*,BVH4Curve4vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Curve4iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4OBBCurve4iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Curve8iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1BuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1MBBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH4VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
- DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH4GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- // spatial scene builder
- private:
- DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
-
- // twolevel scene builders
- private:
- DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.cpp
deleted file mode 100644
index 9fe057c392..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.cpp
+++ /dev/null
@@ -1,1165 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "../common/isa.h" // to define EMBREE_TARGET_SIMD8
-
-#if defined (EMBREE_TARGET_SIMD8)
-
-#include "bvh8_factory.h"
-#include "../bvh/bvh.h"
-
-#include "../geometry/curveNv.h"
-#include "../geometry/curveNi.h"
-#include "../geometry/curveNi_mb.h"
-#include "../geometry/linei.h"
-#include "../geometry/triangle.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/trianglev_mb.h"
-#include "../geometry/trianglei.h"
-#include "../geometry/quadv.h"
-#include "../geometry/quadi.h"
-#include "../geometry/subdivpatch1.h"
-#include "../geometry/object.h"
-#include "../geometry/instance.h"
-#include "../geometry/subgrid.h"
-#include "../common/accelinstance.h"
-
-namespace embree
-{
- DECLARE_SYMBOL2(Accel::Collider,BVH8ColliderUserGeom);
-
- DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector8v,void);
- DECLARE_ISA_FUNCTION(VirtualCurveIntersector*,VirtualCurveIntersector8iMB,void);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1MB);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1MB);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4Intersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Pluecker);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Woop);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Pluecker);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Pluecker);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4iIntersector1Pluecker);
- DECLARE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4Intersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,QBVH8Quad4iIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8VirtualIntersector1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8VirtualMBIntersector1);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8InstanceIntersector1);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8InstanceMBIntersector1);
-
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8GridMBIntersector1Moeller);
- DECLARE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Pluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4HybridMB);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4HybridMB);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vIntersector4HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8VirtualIntersector4Chunk);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8VirtualMBIntersector4Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8InstanceIntersector4Chunk);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8InstanceMBIntersector4Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8HybridMB);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8HybridMB);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vIntersector8HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8VirtualIntersector8Chunk);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8VirtualMBIntersector8Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8InstanceIntersector8Chunk);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8InstanceMBIntersector8Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16HybridMB);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16Hybrid);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16HybridMB);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vIntersector16HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridPluecker);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8VirtualIntersector16Chunk);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8VirtualMBIntersector16Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8InstanceIntersector16Chunk);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8InstanceMBIntersector16Chunk);
-
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridMoeller);
- DECLARE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridPluecker);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8IntersectorStreamPacketFallback);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoeller);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamMoeller);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4vIntersectorStreamPluecker);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamPluecker);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoeller);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoellerNoFilter);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamMoeller);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamPluecker);
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamPluecker);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8VirtualIntersectorStream);
-
- DECLARE_SYMBOL2(Accel::IntersectorN,BVH8InstanceIntersectorStream);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH8Curve8vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
- DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
- DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
-
- BVH8Factory::BVH8Factory(int bfeatures, int ifeatures)
- {
- SELECT_SYMBOL_INIT_AVX(ifeatures,BVH8ColliderUserGeom);
-
- selectBuilders(bfeatures);
- selectIntersectors(ifeatures);
- }
-
- void BVH8Factory::selectBuilders(int features)
- {
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX(features,BVH8Curve8vBuilder_OBB_New));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX(features,BVH8OBBCurve8iMBBuilder_OBB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4SceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4vSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4iSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4iMBSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4vMBSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX(features,BVH8QuantizedTriangle4iSceneBuilderSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX(features,BVH8QuantizedTriangle4SceneBuilderSAH));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Quad4vSceneBuilderSAH));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Quad4iSceneBuilderSAH));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Quad4iMBSceneBuilderSAH));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX(features,BVH8QuantizedQuad4iSceneBuilderSAH));
-
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX(features,BVH8VirtualSceneBuilderSAH));
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX(features,BVH8VirtualMBSceneBuilderSAH));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX(features,BVH8InstanceSceneBuilderSAH));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX(features,BVH8InstanceMBSceneBuilderSAH));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX(features,BVH8GridSceneBuilderSAH));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX(features,BVH8GridMBSceneBuilderSAH));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4SceneBuilderFastSpatialSAH));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4vSceneBuilderFastSpatialSAH));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Quad4vSceneBuilderFastSpatialSAH));
-
- IF_ENABLED_TRIS (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelTriangle4MeshSAH));
- IF_ENABLED_TRIS (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelTriangle4vMeshSAH));
- IF_ENABLED_TRIS (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelTriangle4iMeshSAH));
- IF_ENABLED_QUADS (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelQuadMeshSAH));
- IF_ENABLED_USER (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelVirtualSAH));
- IF_ENABLED_INSTANCE (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelInstanceSAH));
- }
-
- void BVH8Factory::selectIntersectors(int features)
- {
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8v));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8iMB));
-
- /* select intersectors1 */
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersector1));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersector1MB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust1));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust1MB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4Intersector1Moeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersector1Moeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vIntersector1Pluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersector1Pluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vIntersector1Woop));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vMBIntersector1Moeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iMBIntersector1Moeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vMBIntersector1Pluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iMBIntersector1Pluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector1Moeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersector1Moeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector1Pluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersector1Pluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iMBIntersector1Moeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iMBIntersector1Pluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,QBVH8Triangle4iIntersector1Pluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,QBVH8Triangle4Intersector1Moeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,QBVH8Quad4iIntersector1Pluecker));
-
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8VirtualIntersector1));
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8VirtualMBIntersector1));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8InstanceIntersector1));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8InstanceMBIntersector1));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector1Moeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridMBIntersector1Moeller))
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector1Pluecker));
-
-#if defined (EMBREE_RAY_PACKETS)
-
- /* select intersectors4 */
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersector4Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersector4HybridMB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust4Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust4HybridMB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4Intersector4HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4Intersector4HybridMoellerNoFilter));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iIntersector4HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vIntersector4HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iIntersector4HybridPluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vMBIntersector4HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iMBIntersector4HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vMBIntersector4HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iMBIntersector4HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector4HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector4HybridMoellerNoFilter));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4iIntersector4HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector4HybridPluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4iIntersector4HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2(features,BVH8Quad4iMBIntersector4HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2(features,BVH8Quad4iMBIntersector4HybridPluecker));
-
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8VirtualIntersector4Chunk));
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8VirtualMBIntersector4Chunk));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8InstanceIntersector4Chunk));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8InstanceMBIntersector4Chunk));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector4HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector4HybridPluecker));
-
- /* select intersectors8 */
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersector8Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersector8HybridMB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust8Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust8HybridMB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4Intersector8HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4Intersector8HybridMoellerNoFilter));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iIntersector8HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vIntersector8HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iIntersector8HybridPluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vMBIntersector8HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iMBIntersector8HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vMBIntersector8HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iMBIntersector8HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector8HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector8HybridMoellerNoFilter));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4iIntersector8HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector8HybridPluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4iIntersector8HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2(features,BVH8Quad4iMBIntersector8HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2(features,BVH8Quad4iMBIntersector8HybridPluecker));
-
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8VirtualIntersector8Chunk));
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8VirtualMBIntersector8Chunk));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8InstanceIntersector8Chunk));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8InstanceMBIntersector8Chunk));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector8HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector8HybridPluecker));
-
- /* select intersectors16 */
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersector16Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersector16HybridMB));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust16Hybrid));
- IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust16HybridMB));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4Intersector16HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4Intersector16HybridMoellerNoFilter));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersector16HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4vIntersector16HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersector16HybridPluecker));
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4vMBIntersector16HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4iMBIntersector16HybridMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4vMBIntersector16HybridPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4iMBIntersector16HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector16HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector16HybridMoellerNoFilter));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersector16HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector16HybridPluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersector16HybridPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4iMBIntersector16HybridMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4iMBIntersector16HybridPluecker));
-
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8VirtualIntersector16Chunk));
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8VirtualMBIntersector16Chunk));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8InstanceIntersector16Chunk));
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8InstanceMBIntersector16Chunk));
-
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8GridIntersector16HybridMoeller));
- IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8GridIntersector16HybridPluecker));
-
- /* select stream intersectors */
-
- SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8IntersectorStreamPacketFallback);
-
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4IntersectorStreamMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4IntersectorStreamMoellerNoFilter));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersectorStreamMoeller));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vIntersectorStreamPluecker));
- IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersectorStreamPluecker));
-
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersectorStreamMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersectorStreamMoellerNoFilter));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersectorStreamMoeller));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersectorStreamPluecker));
- IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersectorStreamPluecker));
-
- IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8VirtualIntersectorStream));
-
- IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8InstanceIntersectorStream));
-
-#endif
- }
-
- Accel::Intersectors BVH8Factory::BVH8OBBVirtualCurveIntersectors(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.leafIntersector = leafIntersector;
- intersectors.intersector1 = BVH8OBBVirtualCurveIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8OBBVirtualCurveIntersector4Hybrid();
- intersectors.intersector8 = BVH8OBBVirtualCurveIntersector8Hybrid();
- intersectors.intersector16 = BVH8OBBVirtualCurveIntersector16Hybrid();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.leafIntersector = leafIntersector;
- intersectors.intersector1 = BVH8OBBVirtualCurveIntersectorRobust1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8OBBVirtualCurveIntersectorRobust4Hybrid();
- intersectors.intersector8 = BVH8OBBVirtualCurveIntersectorRobust8Hybrid();
- intersectors.intersector16 = BVH8OBBVirtualCurveIntersectorRobust16Hybrid();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- default: assert(false);
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH8Factory::BVH8OBBVirtualCurveIntersectorsMB(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.leafIntersector = leafIntersector;
- intersectors.intersector1 = BVH8OBBVirtualCurveIntersector1MB();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8OBBVirtualCurveIntersector4HybridMB();
- intersectors.intersector8 = BVH8OBBVirtualCurveIntersector8HybridMB();
- intersectors.intersector16 = BVH8OBBVirtualCurveIntersector16HybridMB();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.leafIntersector = leafIntersector;
- intersectors.intersector1 = BVH8OBBVirtualCurveIntersectorRobust1MB();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8OBBVirtualCurveIntersectorRobust4HybridMB();
- intersectors.intersector8 = BVH8OBBVirtualCurveIntersectorRobust8HybridMB();
- intersectors.intersector16 = BVH8OBBVirtualCurveIntersectorRobust16HybridMB();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- default: assert(false);
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH8Factory::BVH8Triangle4Intersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- assert(ivariant == IntersectVariant::FAST);
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Triangle4Intersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4_filter = BVH8Triangle4Intersector4HybridMoeller();
- intersectors.intersector4_nofilter = BVH8Triangle4Intersector4HybridMoellerNoFilter();
- intersectors.intersector8_filter = BVH8Triangle4Intersector8HybridMoeller();
- intersectors.intersector8_nofilter = BVH8Triangle4Intersector8HybridMoellerNoFilter();
- intersectors.intersector16_filter = BVH8Triangle4Intersector16HybridMoeller();
- intersectors.intersector16_nofilter = BVH8Triangle4Intersector16HybridMoellerNoFilter();
- intersectors.intersectorN_filter = BVH8Triangle4IntersectorStreamMoeller();
- intersectors.intersectorN_nofilter = BVH8Triangle4IntersectorStreamMoellerNoFilter();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::BVH8Triangle4vIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
-#define ENABLE_WOOP_TEST 0
-#if ENABLE_WOOP_TEST == 0
- //assert(ivariant == IntersectVariant::ROBUST);
- intersectors.intersector1 = BVH8Triangle4vIntersector1Pluecker();
-#else
- intersectors.intersector1 = BVH8Triangle4vIntersector1Woop();
-#endif
-
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Triangle4vIntersector4HybridPluecker();
- intersectors.intersector8 = BVH8Triangle4vIntersector8HybridPluecker();
- intersectors.intersector16 = BVH8Triangle4vIntersector16HybridPluecker();
- intersectors.intersectorN = BVH8Triangle4vIntersectorStreamPluecker();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::BVH8Triangle4iIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Triangle4iIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Triangle4iIntersector4HybridMoeller();
- intersectors.intersector8 = BVH8Triangle4iIntersector8HybridMoeller();
- intersectors.intersector16 = BVH8Triangle4iIntersector16HybridMoeller();
- intersectors.intersectorN = BVH8Triangle4iIntersectorStreamMoeller();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Triangle4iIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Triangle4iIntersector4HybridPluecker();
- intersectors.intersector8 = BVH8Triangle4iIntersector8HybridPluecker();
- intersectors.intersector16 = BVH8Triangle4iIntersector16HybridPluecker();
- intersectors.intersectorN = BVH8Triangle4iIntersectorStreamPluecker();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH8Factory::BVH8Triangle4vMBIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Triangle4vMBIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Triangle4vMBIntersector4HybridMoeller();
- intersectors.intersector8 = BVH8Triangle4vMBIntersector8HybridMoeller();
- intersectors.intersector16 = BVH8Triangle4vMBIntersector16HybridMoeller();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Triangle4vMBIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Triangle4vMBIntersector4HybridPluecker();
- intersectors.intersector8 = BVH8Triangle4vMBIntersector8HybridPluecker();
- intersectors.intersector16 = BVH8Triangle4vMBIntersector16HybridPluecker();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH8Factory::BVH8Triangle4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Triangle4iMBIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Triangle4iMBIntersector4HybridMoeller();
- intersectors.intersector8 = BVH8Triangle4iMBIntersector8HybridMoeller();
- intersectors.intersector16 = BVH8Triangle4iMBIntersector16HybridMoeller();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Triangle4iMBIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Triangle4iMBIntersector4HybridPluecker();
- intersectors.intersector8 = BVH8Triangle4iMBIntersector8HybridPluecker();
- intersectors.intersector16 = BVH8Triangle4iMBIntersector16HybridPluecker();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH8Factory::BVH8Quad4vIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Quad4vIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4_filter = BVH8Quad4vIntersector4HybridMoeller();
- intersectors.intersector4_nofilter = BVH8Quad4vIntersector4HybridMoellerNoFilter();
- intersectors.intersector8_filter = BVH8Quad4vIntersector8HybridMoeller();
- intersectors.intersector8_nofilter = BVH8Quad4vIntersector8HybridMoellerNoFilter();
- intersectors.intersector16_filter = BVH8Quad4vIntersector16HybridMoeller();
- intersectors.intersector16_nofilter = BVH8Quad4vIntersector16HybridMoellerNoFilter();
- intersectors.intersectorN_filter = BVH8Quad4vIntersectorStreamMoeller();
- intersectors.intersectorN_nofilter = BVH8Quad4vIntersectorStreamMoellerNoFilter();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Quad4vIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Quad4vIntersector4HybridPluecker();
- intersectors.intersector8 = BVH8Quad4vIntersector8HybridPluecker();
- intersectors.intersector16 = BVH8Quad4vIntersector16HybridPluecker();
- intersectors.intersectorN = BVH8Quad4vIntersectorStreamPluecker();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH8Factory::BVH8Quad4iIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Quad4iIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Quad4iIntersector4HybridMoeller();
- intersectors.intersector8 = BVH8Quad4iIntersector8HybridMoeller();
- intersectors.intersector16 = BVH8Quad4iIntersector16HybridMoeller();
- intersectors.intersectorN = BVH8Quad4iIntersectorStreamMoeller();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Quad4iIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Quad4iIntersector4HybridPluecker();
- intersectors.intersector8 = BVH8Quad4iIntersector8HybridPluecker();
- intersectors.intersector16 = BVH8Quad4iIntersector16HybridPluecker();
- intersectors.intersectorN = BVH8Quad4iIntersectorStreamPluecker();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH8Factory::BVH8Quad4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- switch (ivariant) {
- case IntersectVariant::FAST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Quad4iMBIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Quad4iMBIntersector4HybridMoeller();
- intersectors.intersector8 = BVH8Quad4iMBIntersector8HybridMoeller();
- intersectors.intersector16 = BVH8Quad4iMBIntersector16HybridMoeller();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- case IntersectVariant::ROBUST:
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8Quad4iMBIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8Quad4iMBIntersector4HybridPluecker();
- intersectors.intersector8 = BVH8Quad4iMBIntersector8HybridPluecker();
- intersectors.intersector16 = BVH8Quad4iMBIntersector16HybridPluecker();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
- }
- return Accel::Intersectors();
- }
-
- Accel::Intersectors BVH8Factory::QBVH8Triangle4iIntersectors(BVH8* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = QBVH8Triangle4iIntersector1Pluecker();
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::QBVH8Triangle4Intersectors(BVH8* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = QBVH8Triangle4Intersector1Moeller();
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::QBVH8Quad4iIntersectors(BVH8* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = QBVH8Quad4iIntersector1Pluecker();
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::BVH8UserGeometryIntersectors(BVH8* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8VirtualIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8VirtualIntersector4Chunk();
- intersectors.intersector8 = BVH8VirtualIntersector8Chunk();
- intersectors.intersector16 = BVH8VirtualIntersector16Chunk();
- intersectors.intersectorN = BVH8VirtualIntersectorStream();
-#endif
- intersectors.collider = BVH8ColliderUserGeom();
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::BVH8UserGeometryMBIntersectors(BVH8* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8VirtualMBIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8VirtualMBIntersector4Chunk();
- intersectors.intersector8 = BVH8VirtualMBIntersector8Chunk();
- intersectors.intersector16 = BVH8VirtualMBIntersector16Chunk();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::BVH8InstanceIntersectors(BVH8* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8InstanceIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8InstanceIntersector4Chunk();
- intersectors.intersector8 = BVH8InstanceIntersector8Chunk();
- intersectors.intersector16 = BVH8InstanceIntersector16Chunk();
- intersectors.intersectorN = BVH8InstanceIntersectorStream();
-#endif
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::BVH8InstanceMBIntersectors(BVH8* bvh)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8InstanceMBIntersector1();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8InstanceMBIntersector4Chunk();
- intersectors.intersector8 = BVH8InstanceMBIntersector8Chunk();
- intersectors.intersector16 = BVH8InstanceMBIntersector16Chunk();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- return intersectors;
- }
-
- Accel* BVH8Factory::BVH8OBBVirtualCurve8v(Scene* scene, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Curve8v::type,scene);
- Accel::Intersectors intersectors = BVH8OBBVirtualCurveIntersectors(accel,VirtualCurveIntersector8v(),ivariant);
- Builder* builder = BVH8Curve8vBuilder_OBB_New(accel,scene,0);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Curve8iMB::type,scene);
- Accel::Intersectors intersectors = BVH8OBBVirtualCurveIntersectorsMB(accel,VirtualCurveIntersector8iMB(),ivariant);
- Builder* builder = BVH8OBBCurve8iMBBuilder_OBB(accel,scene,0);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Triangle4(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Triangle4::type,scene);
- Accel::Intersectors intersectors= BVH8Triangle4Intersectors(accel,ivariant);
- Builder* builder = nullptr;
- if (scene->device->tri_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8Triangle4SceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelTriangle4MeshSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: builder = BVH8Triangle4SceneBuilderFastSpatialSAH(accel,scene,0); break;
- }
- }
- else if (scene->device->tri_builder == "sah" ) builder = BVH8Triangle4SceneBuilderSAH(accel,scene,0);
- else if (scene->device->tri_builder == "sah_fast_spatial") builder = BVH8Triangle4SceneBuilderFastSpatialSAH(accel,scene,0);
- else if (scene->device->tri_builder == "sah_presplit") builder = BVH8Triangle4SceneBuilderSAH(accel,scene,MODE_HIGH_QUALITY);
- else if (scene->device->tri_builder == "dynamic" ) builder = BVH8BuilderTwoLevelTriangle4MeshSAH(accel,scene,false);
- else if (scene->device->tri_builder == "morton" ) builder = BVH8BuilderTwoLevelTriangle4MeshSAH(accel,scene,true);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH8<Triangle4>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Triangle4v(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Triangle4v::type,scene);
- Accel::Intersectors intersectors= BVH8Triangle4vIntersectors(accel,ivariant);
- Builder* builder = nullptr;
- if (scene->device->tri_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8Triangle4vSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelTriangle4vMeshSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: builder = BVH8Triangle4vSceneBuilderFastSpatialSAH(accel,scene,0); break;
- }
- }
- else if (scene->device->tri_builder == "sah_fast_spatial") builder = BVH8Triangle4SceneBuilderFastSpatialSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH8<Triangle4v>");
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Triangle4i(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Triangle4i::type,scene);
- Accel::Intersectors intersectors = BVH8Triangle4iIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->tri_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8Triangle4iSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelTriangle4iMeshSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: assert(false); break; // FIXME: implement
- }
- }
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder+" for BVH8<Triangle4i>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Triangle4iMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Triangle4i::type,scene);
- Accel::Intersectors intersectors = BVH8Triangle4iMBIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->tri_builder_mb == "default") { // FIXME: implement
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8Triangle4iMBSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->tri_builder_mb == "internal_time_splits") builder = BVH8Triangle4iMBSceneBuilderSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder_mb+" for BVH8<Triangle4iMB>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Triangle4vMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Triangle4vMB::type,scene);
- Accel::Intersectors intersectors= BVH8Triangle4vMBIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->tri_builder_mb == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8Triangle4vMBSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->tri_builder_mb == "internal_time_splits") builder = BVH8Triangle4vMBSceneBuilderSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->tri_builder_mb+" for BVH8<Triangle4vMB>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8QuantizedTriangle4i(Scene* scene)
- {
- BVH8* accel = new BVH8(Triangle4i::type,scene);
- Accel::Intersectors intersectors = QBVH8Triangle4iIntersectors(accel);
- Builder* builder = BVH8QuantizedTriangle4iSceneBuilderSAH(accel,scene,0);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8QuantizedTriangle4(Scene* scene)
- {
- BVH8* accel = new BVH8(Triangle4::type,scene);
- Accel::Intersectors intersectors = QBVH8Triangle4Intersectors(accel);
- Builder* builder = BVH8QuantizedTriangle4SceneBuilderSAH(accel,scene,0);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Quad4v(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Quad4v::type,scene);
- Accel::Intersectors intersectors = BVH8Quad4vIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->quad_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8Quad4vSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelQuadMeshSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: builder = BVH8Quad4vSceneBuilderFastSpatialSAH(accel,scene,0); break;
- }
- }
- else if (scene->device->quad_builder == "dynamic" ) builder = BVH8BuilderTwoLevelQuadMeshSAH(accel,scene,false);
- else if (scene->device->quad_builder == "morton" ) builder = BVH8BuilderTwoLevelQuadMeshSAH(accel,scene,true);
- else if (scene->device->quad_builder == "sah_fast_spatial" ) builder = BVH8Quad4vSceneBuilderFastSpatialSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder+" for BVH8<Quad4v>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Quad4i(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Quad4i::type,scene);
- Accel::Intersectors intersectors = BVH8Quad4iIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->quad_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8Quad4iSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement
- case BuildVariant::HIGH_QUALITY: assert(false); break; // FIXME: implement
- }
- }
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder+" for BVH8<Quad4i>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Quad4iMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(Quad4i::type,scene);
- Accel::Intersectors intersectors = BVH8Quad4iMBIntersectors(accel,ivariant);
-
- Builder* builder = nullptr;
- if (scene->device->quad_builder_mb == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8Quad4iMBSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : assert(false); break; // FIXME: implement
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder_mb+" for BVH8<Quad4i>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8QuantizedQuad4i(Scene* scene)
- {
- BVH8* accel = new BVH8(Quad4i::type,scene);
- Accel::Intersectors intersectors = QBVH8Quad4iIntersectors(accel);
- Builder* builder = nullptr;
- if (scene->device->quad_builder == "default" ) builder = BVH8QuantizedQuad4iSceneBuilderSAH(accel,scene,0);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->quad_builder+" for QBVH8<Quad4i>");
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8UserGeometry(Scene* scene, BuildVariant bvariant)
- {
- BVH8* accel = new BVH8(Object::type,scene);
- Accel::Intersectors intersectors = BVH8UserGeometryIntersectors(accel);
-
- Builder* builder = nullptr;
- if (scene->device->object_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8VirtualSceneBuilderSAH(accel,scene,0); break;
- case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelVirtualSAH(accel,scene,false); break;
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->object_builder == "sah") builder = BVH8VirtualSceneBuilderSAH(accel,scene,0);
- else if (scene->device->object_builder == "dynamic") builder = BVH8BuilderTwoLevelVirtualSAH(accel,scene,false);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH8<Object>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8UserGeometryMB(Scene* scene)
- {
- BVH8* accel = new BVH8(Object::type,scene);
- Accel::Intersectors intersectors = BVH8UserGeometryMBIntersectors(accel);
- Builder* builder = BVH8VirtualMBSceneBuilderSAH(accel,scene,0);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8Instance(Scene* scene, bool isExpensive, BuildVariant bvariant)
- {
- BVH8* accel = new BVH8(InstancePrimitive::type,scene);
- Accel::Intersectors intersectors = BVH8InstanceIntersectors(accel);
- auto gtype = isExpensive ? Geometry::MTY_INSTANCE_EXPENSIVE : Geometry::MTY_INSTANCE;
- // Builder* builder = BVH8InstanceSceneBuilderSAH(accel,scene,gtype);
-
- Builder* builder = nullptr;
- if (scene->device->object_builder == "default") {
- switch (bvariant) {
- case BuildVariant::STATIC : builder = BVH8InstanceSceneBuilderSAH(accel,scene,gtype);; break;
- case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelInstanceSAH(accel,scene,gtype,false); break;
- case BuildVariant::HIGH_QUALITY: assert(false); break;
- }
- }
- else if (scene->device->object_builder == "sah") builder = BVH8InstanceSceneBuilderSAH(accel,scene,gtype);
- else if (scene->device->object_builder == "dynamic") builder = BVH8BuilderTwoLevelInstanceSAH(accel,scene,gtype,false);
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH8<Object>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8InstanceMB(Scene* scene, bool isExpensive)
- {
- BVH8* accel = new BVH8(InstancePrimitive::type,scene);
- Accel::Intersectors intersectors = BVH8InstanceMBIntersectors(accel);
- auto gtype = isExpensive ? Geometry::MTY_INSTANCE_EXPENSIVE : Geometry::MTY_INSTANCE;
- Builder* builder = BVH8InstanceMBSceneBuilderSAH(accel,scene,gtype);
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel::Intersectors BVH8Factory::BVH8GridIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- if (ivariant == IntersectVariant::FAST)
- {
- intersectors.intersector1 = BVH8GridIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8GridIntersector4HybridMoeller();
- intersectors.intersector8 = BVH8GridIntersector8HybridMoeller();
- intersectors.intersector16 = BVH8GridIntersector16HybridMoeller();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- }
- else /* if (ivariant == IntersectVariant::ROBUST) */
- {
- intersectors.intersector1 = BVH8GridIntersector1Pluecker();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = BVH8GridIntersector4HybridPluecker();
- intersectors.intersector8 = BVH8GridIntersector8HybridPluecker();
- intersectors.intersector16 = BVH8GridIntersector16HybridPluecker();
- intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
-#endif
- }
- return intersectors;
- }
-
- Accel::Intersectors BVH8Factory::BVH8GridMBIntersectors(BVH8* bvh, IntersectVariant ivariant)
- {
- Accel::Intersectors intersectors;
- intersectors.ptr = bvh;
- intersectors.intersector1 = BVH8GridMBIntersector1Moeller();
-#if defined (EMBREE_RAY_PACKETS)
- intersectors.intersector4 = nullptr;
- intersectors.intersector8 = nullptr;
- intersectors.intersector16 = nullptr;
- intersectors.intersectorN = nullptr;
-#endif
- return intersectors;
- }
-
- Accel* BVH8Factory::BVH8Grid(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(SubGridQBVH8::type,scene);
- Accel::Intersectors intersectors = BVH8GridIntersectors(accel,ivariant);
- Builder* builder = nullptr;
- if (scene->device->grid_builder == "default") {
- builder = BVH8GridSceneBuilderSAH(accel,scene,0);
- }
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH4<GridMesh>");
-
- return new AccelInstance(accel,builder,intersectors);
- }
-
- Accel* BVH8Factory::BVH8GridMB(Scene* scene, BuildVariant bvariant, IntersectVariant ivariant)
- {
- BVH8* accel = new BVH8(SubGridQBVH8::type,scene);
- Accel::Intersectors intersectors = BVH8GridMBIntersectors(accel,ivariant);
- Builder* builder = nullptr;
- if (scene->device->grid_builder_mb == "default") {
- builder = BVH8GridMBSceneBuilderSAH(accel,scene,0);
- }
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH8MB<GridMesh>");
- return new AccelInstance(accel,builder,intersectors);
- }
-}
-
-#endif
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.h b/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.h
deleted file mode 100644
index b92188e7d3..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.h
+++ /dev/null
@@ -1,280 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_factory.h"
-
-namespace embree
-{
- /*! BVH8 instantiations */
- class BVH8Factory : public BVHFactory
- {
- public:
- BVH8Factory(int bfeatures, int ifeatures);
-
- public:
- Accel* BVH8OBBVirtualCurve8v(Scene* scene, IntersectVariant ivariant);
- Accel* BVH8OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant);
- DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8v);
- DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8iMB);
-
- Accel* BVH8Triangle4 (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH8Triangle4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH8Triangle4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH8Triangle4vMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH8Triangle4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
-
- Accel* BVH8Quad4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH8Quad4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH8Quad4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
-
- Accel* BVH8QuantizedTriangle4i(Scene* scene);
- Accel* BVH8QuantizedTriangle4(Scene* scene);
- Accel* BVH8QuantizedQuad4i(Scene* scene);
-
- Accel* BVH8UserGeometry(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
- Accel* BVH8UserGeometryMB(Scene* scene);
-
- Accel* BVH8Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC);
- Accel* BVH8InstanceMB(Scene* scene, bool isExpensive);
-
- Accel* BVH8Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
- Accel* BVH8GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
-
- private:
- void selectBuilders(int features);
- void selectIntersectors(int features);
-
- private:
- Accel::Intersectors BVH8OBBVirtualCurveIntersectors(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
- Accel::Intersectors BVH8OBBVirtualCurveIntersectorsMB(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
-
- Accel::Intersectors BVH8Triangle4Intersectors(BVH8* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH8Triangle4vIntersectors(BVH8* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH8Triangle4iIntersectors(BVH8* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH8Triangle4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH8Triangle4vMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
-
- Accel::Intersectors BVH8Quad4vIntersectors(BVH8* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH8Quad4iIntersectors(BVH8* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH8Quad4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
-
- Accel::Intersectors QBVH8Triangle4iIntersectors(BVH8* bvh);
- Accel::Intersectors QBVH8Triangle4Intersectors(BVH8* bvh);
- Accel::Intersectors QBVH8Quad4iIntersectors(BVH8* bvh);
-
- Accel::Intersectors BVH8UserGeometryIntersectors(BVH8* bvh);
- Accel::Intersectors BVH8UserGeometryMBIntersectors(BVH8* bvh);
-
- Accel::Intersectors BVH8InstanceIntersectors(BVH8* bvh);
- Accel::Intersectors BVH8InstanceMBIntersectors(BVH8* bvh);
-
- Accel::Intersectors BVH8GridIntersectors(BVH8* bvh, IntersectVariant ivariant);
- Accel::Intersectors BVH8GridMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
-
- private:
- DEFINE_SYMBOL2(Accel::Collider,BVH8ColliderUserGeom);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1MB);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1MB);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4Intersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Pluecker);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Pluecker);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Woop);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Pluecker);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4iIntersector1Pluecker);
- DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4Intersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Quad4iIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8VirtualIntersector1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8VirtualMBIntersector1);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceIntersector1);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceMBIntersector1);
-
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridMBIntersector1Moeller);
- DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Pluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4HybridMB);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4HybridMB);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vIntersector4HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8VirtualIntersector4Chunk);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8VirtualMBIntersector4Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceIntersector4Chunk);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceMBIntersector4Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8HybridMB);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8HybridMB);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vIntersector8HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8VirtualIntersector8Chunk);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8VirtualMBIntersector8Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceIntersector8Chunk);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceMBIntersector8Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16HybridMB);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16Hybrid);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16HybridMB);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vIntersector16HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridPluecker);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8VirtualIntersector16Chunk);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8VirtualMBIntersector16Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceIntersector16Chunk);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceMBIntersector16Chunk);
-
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridMoeller);
- DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridPluecker);
-
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8IntersectorStreamPacketFallback);
-
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoeller);
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamMoeller);
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4vIntersectorStreamPluecker);
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamPluecker);
-
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoeller);
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoellerNoFilter);
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamMoeller);
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamPluecker);
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamPluecker);
-
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8VirtualIntersectorStream);
-
- DEFINE_SYMBOL2(Accel::IntersectorN,BVH8InstanceIntersectorStream);
-
- // SAH scene builders
- private:
- DEFINE_ISA_FUNCTION(Builder*,BVH8Curve8vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH8VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
- DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
-
- DEFINE_ISA_FUNCTION(Builder*,BVH8GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
-
- // SAH spatial scene builders
- private:
- DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
- DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
-
- // twolevel scene builders
- private:
- DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
- DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.cpp
deleted file mode 100644
index e832537ec5..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh_builder.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int N>
- typename BVHN<N>::NodeRef BVHNBuilderVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings)
- {
- auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRef {
- return createLeaf(prims,set,alloc);
- };
-
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxBuildDepthLeaf;
- return BVHBuilderBinnedSAH::build<NodeRef>
- (FastAllocator::Create(allocator),typename BVH::AABBNode::Create2(),typename BVH::AABBNode::Set3(allocator,prims),createLeafFunc,progressFunc,prims,pinfo,settings);
- }
-
-
- template<int N>
- typename BVHN<N>::NodeRef BVHNBuilderQuantizedVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings)
- {
- auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRef {
- return createLeaf(prims,set,alloc);
- };
-
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxBuildDepthLeaf;
- return BVHBuilderBinnedSAH::build<NodeRef>
- (FastAllocator::Create(allocator),typename BVH::QuantizedNode::Create2(),typename BVH::QuantizedNode::Set2(),createLeafFunc,progressFunc,prims,pinfo,settings);
- }
-
- template<int N>
- typename BVHN<N>::NodeRecordMB BVHNBuilderMblurVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange)
- {
- auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRecordMB {
- return createLeaf(prims,set,alloc);
- };
-
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxBuildDepthLeaf;
- return BVHBuilderBinnedSAH::build<NodeRecordMB>
- (FastAllocator::Create(allocator),typename BVH::AABBNodeMB::Create(),typename BVH::AABBNodeMB::SetTimeRange(timeRange),createLeafFunc,progressFunc,prims,pinfo,settings);
- }
-
- template struct BVHNBuilderVirtual<4>;
- template struct BVHNBuilderQuantizedVirtual<4>;
- template struct BVHNBuilderMblurVirtual<4>;
-
-#if defined(__AVX__)
- template struct BVHNBuilderVirtual<8>;
- template struct BVHNBuilderQuantizedVirtual<8>;
- template struct BVHNBuilderMblurVirtual<8>;
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.h
deleted file mode 100644
index 1b86bb45ad..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.h
+++ /dev/null
@@ -1,114 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh.h"
-#include "../builders/bvh_builder_sah.h"
-
-namespace embree
-{
- namespace isa
- {
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
- template<int N>
- struct BVHNBuilderVirtual
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef FastAllocator::CachedAllocator Allocator;
-
- struct BVHNBuilderV {
- NodeRef build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings);
- virtual NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
- };
-
- template<typename CreateLeafFunc>
- struct BVHNBuilderT : public BVHNBuilderV
- {
- BVHNBuilderT (CreateLeafFunc createLeafFunc)
- : createLeafFunc(createLeafFunc) {}
-
- NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
- return createLeafFunc(prims,set,alloc);
- }
-
- private:
- CreateLeafFunc createLeafFunc;
- };
-
- template<typename CreateLeafFunc>
- static NodeRef build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings) {
- return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings);
- }
- };
-
- template<int N>
- struct BVHNBuilderQuantizedVirtual
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef FastAllocator::CachedAllocator Allocator;
-
- struct BVHNBuilderV {
- NodeRef build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings);
- virtual NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
- };
-
- template<typename CreateLeafFunc>
- struct BVHNBuilderT : public BVHNBuilderV
- {
- BVHNBuilderT (CreateLeafFunc createLeafFunc)
- : createLeafFunc(createLeafFunc) {}
-
- NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
- return createLeafFunc(prims,set,alloc);
- }
-
- private:
- CreateLeafFunc createLeafFunc;
- };
-
- template<typename CreateLeafFunc>
- static NodeRef build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings) {
- return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings);
- }
- };
-
- template<int N>
- struct BVHNBuilderMblurVirtual
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::AABBNodeMB AABBNodeMB;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecordMB NodeRecordMB;
- typedef FastAllocator::CachedAllocator Allocator;
-
- struct BVHNBuilderV {
- NodeRecordMB build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange);
- virtual NodeRecordMB createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
- };
-
- template<typename CreateLeafFunc>
- struct BVHNBuilderT : public BVHNBuilderV
- {
- BVHNBuilderT (CreateLeafFunc createLeafFunc)
- : createLeafFunc(createLeafFunc) {}
-
- NodeRecordMB createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
- return createLeafFunc(prims,set,alloc);
- }
-
- private:
- CreateLeafFunc createLeafFunc;
- };
-
- template<typename CreateLeafFunc>
- static NodeRecordMB build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange) {
- return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings,timeRange);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_morton.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_morton.cpp
deleted file mode 100644
index 64759c1294..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_morton.cpp
+++ /dev/null
@@ -1,531 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh.h"
-#include "bvh_statistics.h"
-#include "bvh_rotate.h"
-#include "../common/profile.h"
-#include "../../common/algorithms/parallel_prefix_sum.h"
-
-#include "../builders/primrefgen.h"
-#include "../builders/bvh_builder_morton.h"
-
-#include "../geometry/triangle.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/trianglei.h"
-#include "../geometry/quadv.h"
-#include "../geometry/quadi.h"
-#include "../geometry/object.h"
-#include "../geometry/instance.h"
-
-#if defined(__X86_64__) || defined(__aarch64__)
-# define ROTATE_TREE 1 // specifies number of tree rotation rounds to perform
-#else
-# define ROTATE_TREE 0 // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues
-#endif
-
-namespace embree
-{
- namespace isa
- {
- template<int N>
- struct SetBVHNBounds
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecord NodeRecord;
- typedef typename BVH::AABBNode AABBNode;
-
- BVH* bvh;
- __forceinline SetBVHNBounds (BVH* bvh) : bvh(bvh) {}
-
- __forceinline NodeRecord operator() (NodeRef ref, const NodeRecord* children, size_t num)
- {
- AABBNode* node = ref.getAABBNode();
-
- BBox3fa res = empty;
- for (size_t i=0; i<num; i++) {
- const BBox3fa b = children[i].bounds;
- res.extend(b);
- node->setRef(i,children[i].ref);
- node->setBounds(i,b);
- }
-
- BBox3fx result = (BBox3fx&)res;
-#if ROTATE_TREE
- if (N == 4)
- {
- size_t n = 0;
- for (size_t i=0; i<num; i++)
- n += children[i].bounds.lower.a;
-
- if (n >= 4096) {
- for (size_t i=0; i<num; i++) {
- if (children[i].bounds.lower.a < 4096) {
- for (int j=0; j<ROTATE_TREE; j++)
- BVHNRotate<N>::rotate(node->child(i));
- node->child(i).setBarrier();
- }
- }
- }
- result.lower.a = unsigned(n);
- }
-#endif
-
- return NodeRecord(ref,result);
- }
- };
-
- template<int N, typename Primitive>
- struct CreateMortonLeaf;
-
- template<int N>
- struct CreateMortonLeaf<N,Triangle4>
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecord NodeRecord;
-
- __forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
- : mesh(mesh), morton(morton), geomID_(geomID) {}
-
- __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
- {
- vfloat4 lower(pos_inf);
- vfloat4 upper(neg_inf);
- size_t items = current.size();
- size_t start = current.begin();
- assert(items<=4);
-
- /* allocate leaf node */
- Triangle4* accel = (Triangle4*) alloc.malloc1(sizeof(Triangle4),BVH::byteAlignment);
- NodeRef ref = BVH::encodeLeaf((char*)accel,1);
- vuint4 vgeomID = -1, vprimID = -1;
- Vec3vf4 v0 = zero, v1 = zero, v2 = zero;
- const TriangleMesh* __restrict__ const mesh = this->mesh;
-
- for (size_t i=0; i<items; i++)
- {
- const unsigned int primID = morton[start+i].index;
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- const Vec3fa& p0 = mesh->vertex(tri.v[0]);
- const Vec3fa& p1 = mesh->vertex(tri.v[1]);
- const Vec3fa& p2 = mesh->vertex(tri.v[2]);
- lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
- upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
- vgeomID [i] = geomID_;
- vprimID [i] = primID;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- }
-
- Triangle4::store_nt(accel,Triangle4(v0,v1,v2,vgeomID,vprimID));
- BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
-#if ROTATE_TREE
- if (N == 4)
- box_o.lower.a = unsigned(current.size());
-#endif
- return NodeRecord(ref,box_o);
- }
-
- private:
- TriangleMesh* mesh;
- BVHBuilderMorton::BuildPrim* morton;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- };
-
- template<int N>
- struct CreateMortonLeaf<N,Triangle4v>
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecord NodeRecord;
-
- __forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
- : mesh(mesh), morton(morton), geomID_(geomID) {}
-
- __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
- {
- vfloat4 lower(pos_inf);
- vfloat4 upper(neg_inf);
- size_t items = current.size();
- size_t start = current.begin();
- assert(items<=4);
-
- /* allocate leaf node */
- Triangle4v* accel = (Triangle4v*) alloc.malloc1(sizeof(Triangle4v),BVH::byteAlignment);
- NodeRef ref = BVH::encodeLeaf((char*)accel,1);
- vuint4 vgeomID = -1, vprimID = -1;
- Vec3vf4 v0 = zero, v1 = zero, v2 = zero;
- const TriangleMesh* __restrict__ mesh = this->mesh;
-
- for (size_t i=0; i<items; i++)
- {
- const unsigned int primID = morton[start+i].index;
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- const Vec3fa& p0 = mesh->vertex(tri.v[0]);
- const Vec3fa& p1 = mesh->vertex(tri.v[1]);
- const Vec3fa& p2 = mesh->vertex(tri.v[2]);
- lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
- upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
- vgeomID [i] = geomID_;
- vprimID [i] = primID;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- }
- Triangle4v::store_nt(accel,Triangle4v(v0,v1,v2,vgeomID,vprimID));
- BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
-#if ROTATE_TREE
- if (N == 4)
- box_o.lower.a = current.size();
-#endif
- return NodeRecord(ref,box_o);
- }
- private:
- TriangleMesh* mesh;
- BVHBuilderMorton::BuildPrim* morton;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- };
-
- template<int N>
- struct CreateMortonLeaf<N,Triangle4i>
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecord NodeRecord;
-
- __forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
- : mesh(mesh), morton(morton), geomID_(geomID) {}
-
- __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
- {
- vfloat4 lower(pos_inf);
- vfloat4 upper(neg_inf);
- size_t items = current.size();
- size_t start = current.begin();
- assert(items<=4);
-
- /* allocate leaf node */
- Triangle4i* accel = (Triangle4i*) alloc.malloc1(sizeof(Triangle4i),BVH::byteAlignment);
- NodeRef ref = BVH::encodeLeaf((char*)accel,1);
-
- vuint4 v0 = zero, v1 = zero, v2 = zero;
- vuint4 vgeomID = -1, vprimID = -1;
- const TriangleMesh* __restrict__ const mesh = this->mesh;
-
- for (size_t i=0; i<items; i++)
- {
- const unsigned int primID = morton[start+i].index;
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- const Vec3fa& p0 = mesh->vertex(tri.v[0]);
- const Vec3fa& p1 = mesh->vertex(tri.v[1]);
- const Vec3fa& p2 = mesh->vertex(tri.v[2]);
- lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
- upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
- vgeomID[i] = geomID_;
- vprimID[i] = primID;
- unsigned int int_stride = mesh->vertices0.getStride()/4;
- v0[i] = tri.v[0] * int_stride;
- v1[i] = tri.v[1] * int_stride;
- v2[i] = tri.v[2] * int_stride;
- }
-
- for (size_t i=items; i<4; i++)
- {
- vgeomID[i] = vgeomID[0];
- vprimID[i] = -1;
- v0[i] = 0;
- v1[i] = 0;
- v2[i] = 0;
- }
- Triangle4i::store_nt(accel,Triangle4i(v0,v1,v2,vgeomID,vprimID));
- BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
-#if ROTATE_TREE
- if (N == 4)
- box_o.lower.a = current.size();
-#endif
- return NodeRecord(ref,box_o);
- }
- private:
- TriangleMesh* mesh;
- BVHBuilderMorton::BuildPrim* morton;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- };
-
- template<int N>
- struct CreateMortonLeaf<N,Quad4v>
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecord NodeRecord;
-
- __forceinline CreateMortonLeaf (QuadMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
- : mesh(mesh), morton(morton), geomID_(geomID) {}
-
- __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
- {
- vfloat4 lower(pos_inf);
- vfloat4 upper(neg_inf);
- size_t items = current.size();
- size_t start = current.begin();
- assert(items<=4);
-
- /* allocate leaf node */
- Quad4v* accel = (Quad4v*) alloc.malloc1(sizeof(Quad4v),BVH::byteAlignment);
- NodeRef ref = BVH::encodeLeaf((char*)accel,1);
-
- vuint4 vgeomID = -1, vprimID = -1;
- Vec3vf4 v0 = zero, v1 = zero, v2 = zero, v3 = zero;
- const QuadMesh* __restrict__ mesh = this->mesh;
-
- for (size_t i=0; i<items; i++)
- {
- const unsigned int primID = morton[start+i].index;
- const QuadMesh::Quad& tri = mesh->quad(primID);
- const Vec3fa& p0 = mesh->vertex(tri.v[0]);
- const Vec3fa& p1 = mesh->vertex(tri.v[1]);
- const Vec3fa& p2 = mesh->vertex(tri.v[2]);
- const Vec3fa& p3 = mesh->vertex(tri.v[3]);
- lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2,(vfloat4)p3);
- upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2,(vfloat4)p3);
- vgeomID [i] = geomID_;
- vprimID [i] = primID;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z;
- }
- Quad4v::store_nt(accel,Quad4v(v0,v1,v2,v3,vgeomID,vprimID));
- BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
-#if ROTATE_TREE
- if (N == 4)
- box_o.lower.a = current.size();
-#endif
- return NodeRecord(ref,box_o);
- }
- private:
- QuadMesh* mesh;
- BVHBuilderMorton::BuildPrim* morton;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- };
-
- template<int N>
- struct CreateMortonLeaf<N,Object>
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecord NodeRecord;
-
- __forceinline CreateMortonLeaf (UserGeometry* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
- : mesh(mesh), morton(morton), geomID_(geomID) {}
-
- __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
- {
- vfloat4 lower(pos_inf);
- vfloat4 upper(neg_inf);
- size_t items = current.size();
- size_t start = current.begin();
-
- /* allocate leaf node */
- Object* accel = (Object*) alloc.malloc1(items*sizeof(Object),BVH::byteAlignment);
- NodeRef ref = BVH::encodeLeaf((char*)accel,items);
- const UserGeometry* mesh = this->mesh;
-
- BBox3fa bounds = empty;
- for (size_t i=0; i<items; i++)
- {
- const unsigned int index = morton[start+i].index;
- const unsigned int primID = index;
- bounds.extend(mesh->bounds(primID));
- new (&accel[i]) Object(geomID_,primID);
- }
-
- BBox3fx box_o = (BBox3fx&)bounds;
-#if ROTATE_TREE
- if (N == 4)
- box_o.lower.a = current.size();
-#endif
- return NodeRecord(ref,box_o);
- }
- private:
- UserGeometry* mesh;
- BVHBuilderMorton::BuildPrim* morton;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- };
-
- template<int N>
- struct CreateMortonLeaf<N,InstancePrimitive>
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecord NodeRecord;
-
- __forceinline CreateMortonLeaf (Instance* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
- : mesh(mesh), morton(morton), geomID_(geomID) {}
-
- __noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
- {
- vfloat4 lower(pos_inf);
- vfloat4 upper(neg_inf);
- size_t items = current.size();
- size_t start = current.begin();
- assert(items <= 1);
-
- /* allocate leaf node */
- InstancePrimitive* accel = (InstancePrimitive*) alloc.malloc1(items*sizeof(InstancePrimitive),BVH::byteAlignment);
- NodeRef ref = BVH::encodeLeaf((char*)accel,items);
- const Instance* instance = this->mesh;
-
- BBox3fa bounds = empty;
- for (size_t i=0; i<items; i++)
- {
- const unsigned int primID = morton[start+i].index;
- bounds.extend(instance->bounds(primID));
- new (&accel[i]) InstancePrimitive(instance, geomID_);
- }
-
- BBox3fx box_o = (BBox3fx&)bounds;
-#if ROTATE_TREE
- if (N == 4)
- box_o.lower.a = current.size();
-#endif
- return NodeRecord(ref,box_o);
- }
- private:
- Instance* mesh;
- BVHBuilderMorton::BuildPrim* morton;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- };
-
- template<typename Mesh>
- struct CalculateMeshBounds
- {
- __forceinline CalculateMeshBounds (Mesh* mesh)
- : mesh(mesh) {}
-
- __forceinline const BBox3fa operator() (const BVHBuilderMorton::BuildPrim& morton) {
- return mesh->bounds(morton.index);
- }
-
- private:
- Mesh* mesh;
- };
-
- template<int N, typename Mesh, typename Primitive>
- class BVHNMeshBuilderMorton : public Builder
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::AABBNode AABBNode;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecord NodeRecord;
-
- public:
-
- BVHNMeshBuilderMorton (BVH* bvh, Mesh* mesh, unsigned int geomID, const size_t minLeafSize, const size_t maxLeafSize, const size_t singleThreadThreshold = DEFAULT_SINGLE_THREAD_THRESHOLD)
- : bvh(bvh), mesh(mesh), morton(bvh->device,0), settings(N,BVH::maxBuildDepth,minLeafSize,min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks),singleThreadThreshold), geomID_(geomID) {}
-
- /* build function */
- void build()
- {
- /* we reset the allocator when the mesh size changed */
- if (mesh->numPrimitives != numPreviousPrimitives) {
- bvh->alloc.clear();
- morton.clear();
- }
- size_t numPrimitives = mesh->size();
- numPreviousPrimitives = numPrimitives;
-
- /* skip build for empty scene */
- if (numPrimitives == 0) {
- bvh->set(BVH::emptyNode,empty,0);
- return;
- }
-
- /* preallocate arrays */
- morton.resize(numPrimitives);
- size_t bytesEstimated = numPrimitives*sizeof(AABBNode)/(4*N) + size_t(1.2f*Primitive::blocks(numPrimitives)*sizeof(Primitive));
- size_t bytesMortonCodes = numPrimitives*sizeof(BVHBuilderMorton::BuildPrim);
- bytesEstimated = max(bytesEstimated,bytesMortonCodes); // the first allocation block is reused to sort the morton codes
- bvh->alloc.init(bytesMortonCodes,bytesMortonCodes,bytesEstimated);
-
- /* create morton code array */
- BVHBuilderMorton::BuildPrim* dest = (BVHBuilderMorton::BuildPrim*) bvh->alloc.specialAlloc(bytesMortonCodes);
- size_t numPrimitivesGen = createMortonCodeArray<Mesh>(mesh,morton,bvh->scene->progressInterface);
-
- /* create BVH */
- SetBVHNBounds<N> setBounds(bvh);
- CreateMortonLeaf<N,Primitive> createLeaf(mesh,geomID_,morton.data());
- CalculateMeshBounds<Mesh> calculateBounds(mesh);
- auto root = BVHBuilderMorton::build<NodeRecord>(
- typename BVH::CreateAlloc(bvh),
- typename BVH::AABBNode::Create(),
- setBounds,createLeaf,calculateBounds,bvh->scene->progressInterface,
- morton.data(),dest,numPrimitivesGen,settings);
-
- bvh->set(root.ref,LBBox3fa(root.bounds),numPrimitives);
-
-#if ROTATE_TREE
- if (N == 4)
- {
- for (int i=0; i<ROTATE_TREE; i++)
- BVHNRotate<N>::rotate(bvh->root);
- bvh->clearBarrier(bvh->root);
- }
-#endif
-
- /* clear temporary data for static geometry */
- if (bvh->scene->isStaticAccel()) {
- morton.clear();
- }
- bvh->cleanup();
- }
-
- void clear() {
- morton.clear();
- }
-
- private:
- BVH* bvh;
- Mesh* mesh;
- mvector<BVHBuilderMorton::BuildPrim> morton;
- BVHBuilderMorton::Settings settings;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- unsigned int numPreviousPrimitives = 0;
- };
-
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- Builder* BVH4Triangle4MeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4> ((BVH4*)bvh,mesh,geomID,4,4); }
- Builder* BVH4Triangle4vMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4v>((BVH4*)bvh,mesh,geomID,4,4); }
- Builder* BVH4Triangle4iMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4i>((BVH4*)bvh,mesh,geomID,4,4); }
-#if defined(__AVX__)
- Builder* BVH8Triangle4MeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4> ((BVH8*)bvh,mesh,geomID,4,4); }
- Builder* BVH8Triangle4vMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4v>((BVH8*)bvh,mesh,geomID,4,4); }
- Builder* BVH8Triangle4iMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4i>((BVH8*)bvh,mesh,geomID,4,4); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_QUAD)
- Builder* BVH4Quad4vMeshBuilderMortonGeneral (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,QuadMesh,Quad4v>((BVH4*)bvh,mesh,geomID,4,4); }
-#if defined(__AVX__)
- Builder* BVH8Quad4vMeshBuilderMortonGeneral (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,QuadMesh,Quad4v>((BVH8*)bvh,mesh,geomID,4,4); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_USER)
- Builder* BVH4VirtualMeshBuilderMortonGeneral (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,UserGeometry,Object>((BVH4*)bvh,mesh,geomID,1,BVH4::maxLeafBlocks); }
-#if defined(__AVX__)
- Builder* BVH8VirtualMeshBuilderMortonGeneral (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,UserGeometry,Object>((BVH8*)bvh,mesh,geomID,1,BVH4::maxLeafBlocks); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- Builder* BVH4InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,Instance,InstancePrimitive>((BVH4*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
-#if defined(__AVX__)
- Builder* BVH8InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,Instance,InstancePrimitive>((BVH8*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
-#endif
-#endif
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah.cpp
deleted file mode 100644
index cf5b2eb47f..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah.cpp
+++ /dev/null
@@ -1,640 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh.h"
-#include "bvh_builder.h"
-#include "../builders/primrefgen.h"
-#include "../builders/splitter.h"
-
-#include "../geometry/linei.h"
-#include "../geometry/triangle.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/trianglev_mb.h"
-#include "../geometry/trianglei.h"
-#include "../geometry/quadv.h"
-#include "../geometry/quadi.h"
-#include "../geometry/object.h"
-#include "../geometry/instance.h"
-#include "../geometry/subgrid.h"
-
-#include "../common/state.h"
-#include "../../common/algorithms/parallel_for_for.h"
-#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
-
-#define PROFILE 0
-#define PROFILE_RUNS 20
-
-namespace embree
-{
- namespace isa
- {
- template<int N, typename Primitive>
- struct CreateLeaf
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
-
- __forceinline CreateLeaf (BVH* bvh) : bvh(bvh) {}
-
- __forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
- {
- size_t n = set.size();
- size_t items = Primitive::blocks(n);
- size_t start = set.begin();
- Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
- typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
- for (size_t i=0; i<items; i++) {
- accel[i].fill(prims,start,set.end(),bvh->scene);
- }
- return node;
- }
-
- BVH* bvh;
- };
-
-
- template<int N, typename Primitive>
- struct CreateLeafQuantized
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
-
- __forceinline CreateLeafQuantized (BVH* bvh) : bvh(bvh) {}
-
- __forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
- {
- size_t n = set.size();
- size_t items = Primitive::blocks(n);
- size_t start = set.begin();
- Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
- typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
- for (size_t i=0; i<items; i++) {
- accel[i].fill(prims,start,set.end(),bvh->scene);
- }
- return node;
- }
-
- BVH* bvh;
- };
-
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
- template<int N, typename Primitive>
- struct BVHNBuilderSAH : public Builder
- {
- typedef BVHN<N> BVH;
- typedef typename BVHN<N>::NodeRef NodeRef;
-
- BVH* bvh;
- Scene* scene;
- Geometry* mesh;
- mvector<PrimRef> prims;
- GeneralBVHBuilder::Settings settings;
- Geometry::GTypeMask gtype_;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max ();
- bool primrefarrayalloc;
- unsigned int numPreviousPrimitives = 0;
-
- BVHNBuilderSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize,
- const Geometry::GTypeMask gtype, bool primrefarrayalloc = false)
- : bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0),
- settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), primrefarrayalloc(primrefarrayalloc) {}
-
- BVHNBuilderSAH (BVH* bvh, Geometry* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
- : bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), geomID_(geomID), primrefarrayalloc(false) {}
-
- // FIXME: shrink bvh->alloc in destructor here and in other builders too
-
- void build()
- {
- /* we reset the allocator when the mesh size changed */
- if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
- bvh->alloc.clear();
- }
-
- /* if we use the primrefarray for allocations we have to take it back from the BVH */
- if (settings.primrefarrayalloc != size_t(inf))
- bvh->alloc.unshare(prims);
-
- /* skip build for empty scene */
- const size_t numPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(gtype_,false);
- numPreviousPrimitives = numPrimitives;
- if (numPrimitives == 0) {
- bvh->clear();
- prims.clear();
- return;
- }
-
- double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + "BuilderSAH");
-
-#if PROFILE
- profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
-#endif
-
- /* create primref array */
- if (primrefarrayalloc) {
- settings.primrefarrayalloc = numPrimitives/1000;
- if (settings.primrefarrayalloc < 1000)
- settings.primrefarrayalloc = inf;
- }
-
- /* enable os_malloc for two level build */
- if (mesh)
- bvh->alloc.setOSallocation(true);
-
- /* initialize allocator */
- const size_t node_bytes = numPrimitives*sizeof(typename BVH::AABBNodeMB)/(4*N);
- const size_t leaf_bytes = size_t(1.2*Primitive::blocks(numPrimitives)*sizeof(Primitive));
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
- prims.resize(numPrimitives);
-
- PrimInfo pinfo = mesh ?
- createPrimRefArray(mesh,geomID_,prims,bvh->scene->progressInterface) :
- createPrimRefArray(scene,gtype_,false,prims,bvh->scene->progressInterface);
-
- /* pinfo might has zero size due to invalid geometry */
- if (unlikely(pinfo.size() == 0))
- {
- bvh->clear();
- prims.clear();
- return;
- }
-
- /* call BVH builder */
- NodeRef root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeaf<N,Primitive>(bvh),bvh->scene->progressInterface,prims.data(),pinfo,settings);
- bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
- bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
-
-#if PROFILE
- });
-#endif
-
- /* if we allocated using the primrefarray we have to keep it alive */
- if (settings.primrefarrayalloc != size_t(inf))
- bvh->alloc.share(prims);
-
- /* for static geometries we can do some cleanups */
- else if (scene && scene->isStaticAccel()) {
- prims.clear();
- }
- bvh->cleanup();
- bvh->postBuild(t0);
- }
-
- void clear() {
- prims.clear();
- }
- };
-
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
- template<int N, typename Primitive>
- struct BVHNBuilderSAHQuantized : public Builder
- {
- typedef BVHN<N> BVH;
- typedef typename BVHN<N>::NodeRef NodeRef;
-
- BVH* bvh;
- Scene* scene;
- Geometry* mesh;
- mvector<PrimRef> prims;
- GeneralBVHBuilder::Settings settings;
- Geometry::GTypeMask gtype_;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- unsigned int numPreviousPrimitives = 0;
-
- BVHNBuilderSAHQuantized (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
- : bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype) {}
-
- BVHNBuilderSAHQuantized (BVH* bvh, Geometry* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
- : bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), geomID_(geomID) {}
-
- // FIXME: shrink bvh->alloc in destructor here and in other builders too
-
- void build()
- {
- /* we reset the allocator when the mesh size changed */
- if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
- bvh->alloc.clear();
- }
-
- /* skip build for empty scene */
- const size_t numPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(gtype_,false);
- numPreviousPrimitives = numPrimitives;
- if (numPrimitives == 0) {
- prims.clear();
- bvh->clear();
- return;
- }
-
- double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::QBVH" + toString(N) + "BuilderSAH");
-
-#if PROFILE
- profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
-#endif
- /* create primref array */
- prims.resize(numPrimitives);
- PrimInfo pinfo = mesh ?
- createPrimRefArray(mesh,geomID_,prims,bvh->scene->progressInterface) :
- createPrimRefArray(scene,gtype_,false,prims,bvh->scene->progressInterface);
-
- /* enable os_malloc for two level build */
- if (mesh)
- bvh->alloc.setOSallocation(true);
-
- /* call BVH builder */
- const size_t node_bytes = numPrimitives*sizeof(typename BVH::QuantizedNode)/(4*N);
- const size_t leaf_bytes = size_t(1.2*Primitive::blocks(numPrimitives)*sizeof(Primitive));
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
- NodeRef root = BVHNBuilderQuantizedVirtual<N>::build(&bvh->alloc,CreateLeafQuantized<N,Primitive>(bvh),bvh->scene->progressInterface,prims.data(),pinfo,settings);
- bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
- //bvh->layoutLargeNodes(pinfo.size()*0.005f); // FIXME: COPY LAYOUT FOR LARGE NODES !!!
-#if PROFILE
- });
-#endif
-
- /* clear temporary data for static geometry */
- if (scene && scene->isStaticAccel()) {
- prims.clear();
- }
- bvh->cleanup();
- bvh->postBuild(t0);
- }
-
- void clear() {
- prims.clear();
- }
- };
-
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
-
- template<int N, typename Primitive>
- struct CreateLeafGrid
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
-
- __forceinline CreateLeafGrid (BVH* bvh, const SubGridBuildData * const sgrids) : bvh(bvh),sgrids(sgrids) {}
-
- __forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
- {
- const size_t items = set.size(); //Primitive::blocks(n);
- const size_t start = set.begin();
-
- /* collect all subsets with unique geomIDs */
- assert(items <= N);
- unsigned int geomIDs[N];
- unsigned int num_geomIDs = 1;
- geomIDs[0] = prims[start].geomID();
-
- for (size_t i=1;i<items;i++)
- {
- bool found = false;
- const unsigned int new_geomID = prims[start+i].geomID();
- for (size_t j=0;j<num_geomIDs;j++)
- if (new_geomID == geomIDs[j])
- { found = true; break; }
- if (!found)
- geomIDs[num_geomIDs++] = new_geomID;
- }
-
- /* allocate all leaf memory in one single block */
- SubGridQBVHN<N>* accel = (SubGridQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridQBVHN<N>),BVH::byteAlignment);
- typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,num_geomIDs);
-
- for (size_t g=0;g<num_geomIDs;g++)
- {
- unsigned int x[N];
- unsigned int y[N];
- unsigned int primID[N];
- BBox3fa bounds[N];
- unsigned int pos = 0;
- for (size_t i=0;i<items;i++)
- {
- if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
-
- const SubGridBuildData& sgrid_bd = sgrids[prims[start+i].primID()];
- x[pos] = sgrid_bd.sx;
- y[pos] = sgrid_bd.sy;
- primID[pos] = sgrid_bd.primID;
- bounds[pos] = prims[start+i].bounds();
- pos++;
- }
- assert(pos <= N);
- new (&accel[g]) SubGridQBVHN<N>(x,y,primID,bounds,geomIDs[g],pos);
- }
-
- return node;
- }
-
- BVH* bvh;
- const SubGridBuildData * const sgrids;
- };
-
-
- template<int N>
- struct BVHNBuilderSAHGrid : public Builder
- {
- typedef BVHN<N> BVH;
- typedef typename BVHN<N>::NodeRef NodeRef;
-
- BVH* bvh;
- Scene* scene;
- GridMesh* mesh;
- mvector<PrimRef> prims;
- mvector<SubGridBuildData> sgrids;
- GeneralBVHBuilder::Settings settings;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- unsigned int numPreviousPrimitives = 0;
-
- BVHNBuilderSAHGrid (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
- : bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0), sgrids(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD) {}
-
- BVHNBuilderSAHGrid (BVH* bvh, GridMesh* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
- : bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), sgrids(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), geomID_(geomID) {}
-
- void build()
- {
- /* we reset the allocator when the mesh size changed */
- if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
- bvh->alloc.clear();
- }
-
- /* if we use the primrefarray for allocations we have to take it back from the BVH */
- if (settings.primrefarrayalloc != size_t(inf))
- bvh->alloc.unshare(prims);
-
- const size_t numGridPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(GridMesh::geom_type,false);
- numPreviousPrimitives = numGridPrimitives;
-
- PrimInfo pinfo(empty);
- size_t numPrimitives = 0;
-
- if (!mesh)
- {
- /* first run to get #primitives */
-
- ParallelForForPrefixSumState<PrimInfo> pstate;
- Scene::Iterator<GridMesh,false> iter(scene);
-
- pstate.init(iter,size_t(1024));
-
- /* iterate over all meshes in the scene */
- pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!mesh->valid(j)) continue;
- BBox3fa bounds = empty;
- const PrimRef prim(bounds,(unsigned)geomID,(unsigned)j);
- if (!mesh->valid(j)) continue;
- pinfo.add_center2(prim,mesh->getNumSubGrids(j));
- }
- return pinfo;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- numPrimitives = pinfo.size();
-
- /* resize arrays */
- sgrids.resize(numPrimitives);
- prims.resize(numPrimitives);
-
- /* second run to fill primrefs and SubGridBuildData arrays */
- pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
- k = base.size();
- size_t p_index = k;
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!mesh->valid(j)) continue;
- const GridMesh::Grid &g = mesh->grid(j);
- for (unsigned int y=0; y<g.resY-1u; y+=2)
- for (unsigned int x=0; x<g.resX-1u; x+=2)
- {
- BBox3fa bounds = empty;
- if (!mesh->buildBounds(g,x,y,bounds)) continue; // get bounds of subgrid
- const PrimRef prim(bounds,(unsigned)geomID,(unsigned)p_index);
- pinfo.add_center2(prim);
- sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
- prims[p_index++] = prim;
- }
- }
- return pinfo;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- assert(pinfo.size() == numPrimitives);
- }
- else
- {
- ParallelPrefixSumState<PrimInfo> pstate;
- /* iterate over all grids in a single mesh */
- pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!mesh->valid(j)) continue;
- BBox3fa bounds = empty;
- const PrimRef prim(bounds,geomID_,unsigned(j));
- pinfo.add_center2(prim,mesh->getNumSubGrids(j));
- }
- return pinfo;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- numPrimitives = pinfo.size();
- /* resize arrays */
- sgrids.resize(numPrimitives);
- prims.resize(numPrimitives);
-
- /* second run to fill primrefs and SubGridBuildData arrays */
- pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo
- {
-
- size_t p_index = base.size();
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!mesh->valid(j)) continue;
- const GridMesh::Grid &g = mesh->grid(j);
- for (unsigned int y=0; y<g.resY-1u; y+=2)
- for (unsigned int x=0; x<g.resX-1u; x+=2)
- {
- BBox3fa bounds = empty;
- if (!mesh->buildBounds(g,x,y,bounds)) continue; // get bounds of subgrid
- const PrimRef prim(bounds,geomID_,unsigned(p_index));
- pinfo.add_center2(prim);
- sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
- prims[p_index++] = prim;
- }
- }
- return pinfo;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- }
-
- /* no primitives */
- if (numPrimitives == 0) {
- bvh->clear();
- prims.clear();
- sgrids.clear();
- return;
- }
-
- double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + "BuilderSAH");
-
- /* create primref array */
- settings.primrefarrayalloc = numPrimitives/1000;
- if (settings.primrefarrayalloc < 1000)
- settings.primrefarrayalloc = inf;
-
- /* enable os_malloc for two level build */
- if (mesh)
- bvh->alloc.setOSallocation(true);
-
- /* initialize allocator */
- const size_t node_bytes = numPrimitives*sizeof(typename BVH::AABBNodeMB)/(4*N);
- const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
-
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
-
- /* pinfo might has zero size due to invalid geometry */
- if (unlikely(pinfo.size() == 0))
- {
- bvh->clear();
- sgrids.clear();
- prims.clear();
- return;
- }
-
- /* call BVH builder */
- NodeRef root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeafGrid<N,SubGridQBVHN<N>>(bvh,sgrids.data()),bvh->scene->progressInterface,prims.data(),pinfo,settings);
- bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
- bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
-
- /* clear temporary array */
- sgrids.clear();
-
- /* if we allocated using the primrefarray we have to keep it alive */
- if (settings.primrefarrayalloc != size_t(inf))
- bvh->alloc.share(prims);
-
- /* for static geometries we can do some cleanups */
- else if (scene && scene->isStaticAccel()) {
- prims.clear();
- }
- bvh->cleanup();
- bvh->postBuild(t0);
- }
-
- void clear() {
- prims.clear();
- }
- };
-
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- Builder* BVH4Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH4Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4v>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH4Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4i>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
-
- Builder* BVH4Triangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH4Triangle4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4v>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH4Triangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type,true); }
-
-
- Builder* BVH4QuantizedTriangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
-#if defined(__AVX__)
- Builder* BVH8Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH8Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4v>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH8Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4i>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
-
- Builder* BVH8Triangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH8Triangle4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4v>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH8Triangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type,true); }
- Builder* BVH8QuantizedTriangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
- Builder* BVH8QuantizedTriangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Triangle4>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
-
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_QUAD)
- Builder* BVH4Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Quad4v>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
- Builder* BVH4Quad4iMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Quad4i>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
- Builder* BVH4Quad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Quad4v>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
- Builder* BVH4Quad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type,true); }
- Builder* BVH4QuantizedQuad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Quad4v>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
- Builder* BVH4QuantizedQuad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
-
-#if defined(__AVX__)
- Builder* BVH8Quad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Quad4v>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
- Builder* BVH8Quad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type,true); }
- Builder* BVH8QuantizedQuad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Quad4v>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
- Builder* BVH8QuantizedQuad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
- Builder* BVH8Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Quad4v>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
-
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_USER)
-
- Builder* BVH4VirtualSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
- int minLeafSize = scene->device->object_accel_min_leaf_size;
- int maxLeafSize = scene->device->object_accel_max_leaf_size;
- return new BVHNBuilderSAH<4,Object>((BVH4*)bvh,scene,4,1.0f,minLeafSize,maxLeafSize,UserGeometry::geom_type);
- }
-
- Builder* BVH4VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) {
- return new BVHNBuilderSAH<4,Object>((BVH4*)bvh,mesh,geomID,4,1.0f,1,inf,UserGeometry::geom_type);
- }
-#if defined(__AVX__)
-
- Builder* BVH8VirtualSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
- int minLeafSize = scene->device->object_accel_min_leaf_size;
- int maxLeafSize = scene->device->object_accel_max_leaf_size;
- return new BVHNBuilderSAH<8,Object>((BVH8*)bvh,scene,8,1.0f,minLeafSize,maxLeafSize,UserGeometry::geom_type);
- }
-
- Builder* BVH8VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) {
- return new BVHNBuilderSAH<8,Object>((BVH8*)bvh,mesh,geomID,8,1.0f,1,inf,UserGeometry::geom_type);
- }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- Builder* BVH4InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype); }
- Builder* BVH4InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
- return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,mesh,geomID,4,1.0f,1,inf,gtype);
- }
-#if defined(__AVX__)
- Builder* BVH8InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype); }
- Builder* BVH8InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
- return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,mesh,geomID,8,1.0f,1,inf,gtype);
- }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_GRID)
- Builder* BVH4GridMeshBuilderSAH (void* bvh, GridMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAHGrid<4>((BVH4*)bvh,mesh,geomID,4,1.0f,4,4,mode); }
- Builder* BVH4GridSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHGrid<4>((BVH4*)bvh,scene,4,1.0f,4,4,mode); } // FIXME: check whether cost factors are correct
-
-#if defined(__AVX__)
- Builder* BVH8GridMeshBuilderSAH (void* bvh, GridMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAHGrid<8>((BVH8*)bvh,mesh,geomID,8,1.0f,8,8,mode); }
- Builder* BVH8GridSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHGrid<8>((BVH8*)bvh,scene,8,1.0f,8,8,mode); } // FIXME: check whether cost factors are correct
-#endif
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_mb.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_mb.cpp
deleted file mode 100644
index 9c01553ec6..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_mb.cpp
+++ /dev/null
@@ -1,705 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh.h"
-#include "bvh_builder.h"
-#include "../builders/bvh_builder_msmblur.h"
-
-#include "../builders/primrefgen.h"
-#include "../builders/splitter.h"
-
-#include "../geometry/linei.h"
-#include "../geometry/triangle.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/trianglev_mb.h"
-#include "../geometry/trianglei.h"
-#include "../geometry/quadv.h"
-#include "../geometry/quadi.h"
-#include "../geometry/object.h"
-#include "../geometry/instance.h"
-#include "../geometry/subgrid.h"
-
-#include "../common/state.h"
-
-// FIXME: remove after removing BVHNBuilderMBlurRootTimeSplitsSAH
-#include "../../common/algorithms/parallel_for_for.h"
-#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
-
-
-namespace embree
-{
- namespace isa
- {
-
-#if 0
- template<int N, typename Primitive>
- struct CreateMBlurLeaf
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecordMB NodeRecordMB;
-
- __forceinline CreateMBlurLeaf (BVH* bvh, PrimRef* prims, size_t time) : bvh(bvh), prims(prims), time(time) {}
-
- __forceinline NodeRecordMB operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
- {
- size_t items = Primitive::blocks(set.size());
- size_t start = set.begin();
- for (size_t i=start; i<end; i++) assert((*current.prims.prims)[start].geomID() == (*current.prims.prims)[i].geomID()); // assert that all geomIDs are identical
- Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
- NodeRef node = bvh->encodeLeaf((char*)accel,items);
-
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<items; i++)
- allBounds.extend(accel[i].fillMB(prims, start, set.end(), bvh->scene, time));
-
- return NodeRecordMB(node,allBounds);
- }
-
- BVH* bvh;
- PrimRef* prims;
- size_t time;
- };
-#endif
-
- template<int N, typename Mesh, typename Primitive>
- struct CreateMSMBlurLeaf
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecordMB4D NodeRecordMB4D;
-
- __forceinline CreateMSMBlurLeaf (BVH* bvh) : bvh(bvh) {}
-
- __forceinline const NodeRecordMB4D operator() (const BVHBuilderMSMBlur::BuildRecord& current, const FastAllocator::CachedAllocator& alloc) const
- {
- size_t items = Primitive::blocks(current.prims.size());
- size_t start = current.prims.begin();
- size_t end = current.prims.end();
- for (size_t i=start; i<end; i++) assert((*current.prims.prims)[start].geomID() == (*current.prims.prims)[i].geomID()); // assert that all geomIDs are identical
- Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteNodeAlignment);
- NodeRef node = bvh->encodeLeaf((char*)accel,items);
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<items; i++)
- allBounds.extend(accel[i].fillMB(current.prims.prims->data(), start, current.prims.end(), bvh->scene, current.prims.time_range));
- return NodeRecordMB4D(node,allBounds,current.prims.time_range);
- }
-
- BVH* bvh;
- };
-
- /* Motion blur BVH with 4D nodes and internal time splits */
- template<int N, typename Mesh, typename Primitive>
- struct BVHNBuilderMBlurSAH : public Builder
- {
- typedef BVHN<N> BVH;
- typedef typename BVHN<N>::NodeRef NodeRef;
- typedef typename BVHN<N>::NodeRecordMB NodeRecordMB;
- typedef typename BVHN<N>::AABBNodeMB AABBNodeMB;
-
- BVH* bvh;
- Scene* scene;
- const size_t sahBlockSize;
- const float intCost;
- const size_t minLeafSize;
- const size_t maxLeafSize;
- const Geometry::GTypeMask gtype_;
-
- BVHNBuilderMBlurSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
- : bvh(bvh), scene(scene), sahBlockSize(sahBlockSize), intCost(intCost), minLeafSize(minLeafSize), maxLeafSize(min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks)), gtype_(gtype) {}
-
- void build()
- {
- /* skip build for empty scene */
- const size_t numPrimitives = scene->getNumPrimitives(gtype_,true);
- if (numPrimitives == 0) { bvh->clear(); return; }
-
- double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderMBlurSAH");
-
-#if PROFILE
- profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
-#endif
-
- //const size_t numTimeSteps = scene->getNumTimeSteps<typename Mesh::type_t,true>();
- //const size_t numTimeSegments = numTimeSteps-1; assert(numTimeSteps > 1);
-
- /*if (numTimeSegments == 1)
- buildSingleSegment(numPrimitives);
- else*/
- buildMultiSegment(numPrimitives);
-
-#if PROFILE
- });
-#endif
-
- /* clear temporary data for static geometry */
- bvh->cleanup();
- bvh->postBuild(t0);
- }
-
-#if 0 // No longer compatible when time_ranges are present for geometries. Would have to create temporal nodes sometimes, and put only a single geometry into leaf.
- void buildSingleSegment(size_t numPrimitives)
- {
- /* create primref array */
- mvector<PrimRef> prims(scene->device,numPrimitives);
- const PrimInfo pinfo = createPrimRefArrayMBlur(scene,gtype_,prims,bvh->scene->progressInterface,0);
- /* early out if no valid primitives */
- if (pinfo.size() == 0) { bvh->clear(); return; }
- /* estimate acceleration structure size */
- const size_t node_bytes = pinfo.size()*sizeof(AABBNodeMB)/(4*N);
- const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
-
- /* settings for BVH build */
- GeneralBVHBuilder::Settings settings;
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxBuildDepthLeaf;
- settings.logBlockSize = bsr(sahBlockSize);
- settings.minLeafSize = min(minLeafSize,maxLeafSize);
- settings.maxLeafSize = maxLeafSize;
- settings.travCost = travCost;
- settings.intCost = intCost;
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
-
- /* build hierarchy */
- auto root = BVHBuilderBinnedSAH::build<NodeRecordMB>
- (typename BVH::CreateAlloc(bvh),typename BVH::AABBNodeMB::Create(),typename BVH::AABBNodeMB::Set(),
- CreateMBlurLeaf<N,Primitive>(bvh,prims.data(),0),bvh->scene->progressInterface,
- prims.data(),pinfo,settings);
-
- bvh->set(root.ref,root.lbounds,pinfo.size());
- }
-#endif
-
- void buildMultiSegment(size_t numPrimitives)
- {
- /* create primref array */
- mvector<PrimRefMB> prims(scene->device,numPrimitives);
- PrimInfoMB pinfo = createPrimRefArrayMSMBlur(scene,gtype_,prims,bvh->scene->progressInterface);
-
- /* early out if no valid primitives */
- if (pinfo.size() == 0) { bvh->clear(); return; }
-
- /* estimate acceleration structure size */
- const size_t node_bytes = pinfo.num_time_segments*sizeof(AABBNodeMB)/(4*N);
- const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.num_time_segments)*sizeof(Primitive));
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
-
- /* settings for BVH build */
- BVHBuilderMSMBlur::Settings settings;
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxDepth;
- settings.logBlockSize = bsr(sahBlockSize);
- settings.minLeafSize = min(minLeafSize,maxLeafSize);
- settings.maxLeafSize = maxLeafSize;
- settings.travCost = travCost;
- settings.intCost = intCost;
- settings.singleLeafTimeSegment = Primitive::singleTimeSegment;
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
-
- /* build hierarchy */
- auto root =
- BVHBuilderMSMBlur::build<NodeRef>(prims,pinfo,scene->device,
- RecalculatePrimRef<Mesh>(scene),
- typename BVH::CreateAlloc(bvh),
- typename BVH::AABBNodeMB4D::Create(),
- typename BVH::AABBNodeMB4D::Set(),
- CreateMSMBlurLeaf<N,Mesh,Primitive>(bvh),
- bvh->scene->progressInterface,
- settings);
-
- bvh->set(root.ref,root.lbounds,pinfo.num_time_segments);
- }
-
- void clear() {
- }
- };
-
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
- struct GridRecalculatePrimRef
- {
- Scene* scene;
- const SubGridBuildData * const sgrids;
-
- __forceinline GridRecalculatePrimRef (Scene* scene, const SubGridBuildData * const sgrids)
- : scene(scene), sgrids(sgrids) {}
-
- __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
- {
- const unsigned int geomID = prim.geomID();
- const GridMesh* mesh = scene->get<GridMesh>(geomID);
- const unsigned int buildID = prim.primID();
- const SubGridBuildData &subgrid = sgrids[buildID];
- const unsigned int primID = subgrid.primID;
- const size_t x = subgrid.x();
- const size_t y = subgrid.y();
- const LBBox3fa lbounds = mesh->linearBounds(mesh->grid(primID),x,y,time_range);
- const unsigned num_time_segments = mesh->numTimeSegments();
- const range<int> tbounds = mesh->timeSegmentRange(time_range);
- return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, num_time_segments, geomID, buildID);
- }
-
- __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
- const unsigned int geomID = prim.geomID();
- const GridMesh* mesh = scene->get<GridMesh>(geomID);
- const unsigned int buildID = prim.primID();
- const SubGridBuildData &subgrid = sgrids[buildID];
- const unsigned int primID = subgrid.primID;
- const size_t x = subgrid.x();
- const size_t y = subgrid.y();
- return mesh->linearBounds(mesh->grid(primID),x,y,time_range);
- }
-
- };
-
- template<int N>
- struct CreateMSMBlurLeafGrid
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecordMB4D NodeRecordMB4D;
-
- __forceinline CreateMSMBlurLeafGrid (Scene* scene, BVH* bvh, const SubGridBuildData * const sgrids) : scene(scene), bvh(bvh), sgrids(sgrids) {}
-
- __forceinline const NodeRecordMB4D operator() (const BVHBuilderMSMBlur::BuildRecord& current, const FastAllocator::CachedAllocator& alloc) const
- {
- const size_t items = current.prims.size();
- const size_t start = current.prims.begin();
-
- const PrimRefMB* prims = current.prims.prims->data();
- /* collect all subsets with unique geomIDs */
- assert(items <= N);
- unsigned int geomIDs[N];
- unsigned int num_geomIDs = 1;
- geomIDs[0] = prims[start].geomID();
-
- for (size_t i=1;i<items;i++)
- {
- bool found = false;
- const unsigned int new_geomID = prims[start+i].geomID();
- for (size_t j=0;j<num_geomIDs;j++)
- if (new_geomID == geomIDs[j])
- { found = true; break; }
- if (!found)
- geomIDs[num_geomIDs++] = new_geomID;
- }
-
- /* allocate all leaf memory in one single block */
- SubGridMBQBVHN<N>* accel = (SubGridMBQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridMBQBVHN<N>),BVH::byteAlignment);
- typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,num_geomIDs);
-
- LBBox3fa allBounds = empty;
-
- for (size_t g=0;g<num_geomIDs;g++)
- {
- const GridMesh* __restrict__ const mesh = scene->get<GridMesh>(geomIDs[g]);
- unsigned int x[N];
- unsigned int y[N];
- unsigned int primID[N];
- BBox3fa bounds0[N];
- BBox3fa bounds1[N];
- unsigned int pos = 0;
- for (size_t i=0;i<items;i++)
- {
- if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
-
- const SubGridBuildData &sgrid_bd = sgrids[prims[start+i].primID()];
- x[pos] = sgrid_bd.sx;
- y[pos] = sgrid_bd.sy;
- primID[pos] = sgrid_bd.primID;
- const size_t x = sgrid_bd.x();
- const size_t y = sgrid_bd.y();
- LBBox3fa newBounds = mesh->linearBounds(mesh->grid(sgrid_bd.primID),x,y,current.prims.time_range);
- allBounds.extend(newBounds);
- bounds0[pos] = newBounds.bounds0;
- bounds1[pos] = newBounds.bounds1;
- pos++;
- }
- assert(pos <= N);
- new (&accel[g]) SubGridMBQBVHN<N>(x,y,primID,bounds0,bounds1,geomIDs[g],current.prims.time_range.lower,1.0f/current.prims.time_range.size(),pos);
- }
- return NodeRecordMB4D(node,allBounds,current.prims.time_range);
- }
-
- Scene *scene;
- BVH* bvh;
- const SubGridBuildData * const sgrids;
- };
-
-#if 0
- template<int N>
- struct CreateLeafGridMB
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::NodeRecordMB NodeRecordMB;
-
- __forceinline CreateLeafGridMB (Scene* scene, BVH* bvh, const SubGridBuildData * const sgrids)
- : scene(scene), bvh(bvh), sgrids(sgrids) {}
-
- __forceinline NodeRecordMB operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
- {
- const size_t items = set.size();
- const size_t start = set.begin();
-
- /* collect all subsets with unique geomIDs */
- assert(items <= N);
- unsigned int geomIDs[N];
- unsigned int num_geomIDs = 1;
- geomIDs[0] = prims[start].geomID();
-
- for (size_t i=1;i<items;i++)
- {
- bool found = false;
- const unsigned int new_geomID = prims[start+i].geomID();
- for (size_t j=0;j<num_geomIDs;j++)
- if (new_geomID == geomIDs[j])
- { found = true; break; }
- if (!found)
- geomIDs[num_geomIDs++] = new_geomID;
- }
-
- /* allocate all leaf memory in one single block */
- SubGridMBQBVHN<N>* accel = (SubGridMBQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridMBQBVHN<N>),BVH::byteAlignment);
- typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,num_geomIDs);
-
- LBBox3fa allBounds = empty;
-
- for (size_t g=0;g<num_geomIDs;g++)
- {
- const GridMesh* __restrict__ const mesh = scene->get<GridMesh>(geomIDs[g]);
-
- unsigned int x[N];
- unsigned int y[N];
- unsigned int primID[N];
- BBox3fa bounds0[N];
- BBox3fa bounds1[N];
- unsigned int pos = 0;
- for (size_t i=0;i<items;i++)
- {
- if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
-
- const SubGridBuildData &sgrid_bd = sgrids[prims[start+i].primID()];
- x[pos] = sgrid_bd.sx;
- y[pos] = sgrid_bd.sy;
- primID[pos] = sgrid_bd.primID;
- const size_t x = sgrid_bd.x();
- const size_t y = sgrid_bd.y();
- bool MAYBE_UNUSED valid0 = mesh->buildBounds(mesh->grid(sgrid_bd.primID),x,y,0,bounds0[pos]);
- bool MAYBE_UNUSED valid1 = mesh->buildBounds(mesh->grid(sgrid_bd.primID),x,y,1,bounds1[pos]);
- assert(valid0);
- assert(valid1);
- allBounds.extend(LBBox3fa(bounds0[pos],bounds1[pos]));
- pos++;
- }
- new (&accel[g]) SubGridMBQBVHN<N>(x,y,primID,bounds0,bounds1,geomIDs[g],0.0f,1.0f,pos);
- }
- return NodeRecordMB(node,allBounds);
- }
-
- Scene *scene;
- BVH* bvh;
- const SubGridBuildData * const sgrids;
- };
-#endif
-
-
- /* Motion blur BVH with 4D nodes and internal time splits */
- template<int N>
- struct BVHNBuilderMBlurSAHGrid : public Builder
- {
- typedef BVHN<N> BVH;
- typedef typename BVHN<N>::NodeRef NodeRef;
- typedef typename BVHN<N>::NodeRecordMB NodeRecordMB;
- typedef typename BVHN<N>::AABBNodeMB AABBNodeMB;
-
- BVH* bvh;
- Scene* scene;
- const size_t sahBlockSize;
- const float intCost;
- const size_t minLeafSize;
- const size_t maxLeafSize;
- mvector<SubGridBuildData> sgrids;
-
-
- BVHNBuilderMBlurSAHGrid (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize)
- : bvh(bvh), scene(scene), sahBlockSize(sahBlockSize), intCost(intCost), minLeafSize(minLeafSize), maxLeafSize(min(maxLeafSize,BVH::maxLeafBlocks)), sgrids(scene->device,0) {}
-
-
- PrimInfo createPrimRefArrayMBlurGrid(Scene* scene, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime)
- {
- /* first run to get #primitives */
- ParallelForForPrefixSumState<PrimInfo> pstate;
- Scene::Iterator<GridMesh,true> iter(scene);
-
- pstate.init(iter,size_t(1024));
-
- /* iterate over all meshes in the scene */
- PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
-
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!mesh->valid(j,range<size_t>(0,1))) continue;
- BBox3fa bounds = empty;
- const PrimRef prim(bounds,unsigned(geomID),unsigned(j));
- pinfo.add_center2(prim,mesh->getNumSubGrids(j));
- }
- return pinfo;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- size_t numPrimitives = pinfo.size();
- if (numPrimitives == 0) return pinfo;
-
- /* resize arrays */
- sgrids.resize(numPrimitives);
- prims.resize(numPrimitives);
-
- /* second run to fill primrefs and SubGridBuildData arrays */
- pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
-
- k = base.size();
- size_t p_index = k;
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- const GridMesh::Grid &g = mesh->grid(j);
- if (!mesh->valid(j,range<size_t>(0,1))) continue;
-
- for (unsigned int y=0; y<g.resY-1u; y+=2)
- for (unsigned int x=0; x<g.resX-1u; x+=2)
- {
- BBox3fa bounds = empty;
- if (!mesh->buildBounds(g,x,y,itime,bounds)) continue; // get bounds of subgrid
- const PrimRef prim(bounds,unsigned(geomID),unsigned(p_index));
- pinfo.add_center2(prim);
- sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
- prims[p_index++] = prim;
- }
- }
- return pinfo;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- assert(pinfo.size() == numPrimitives);
- return pinfo;
- }
-
- PrimInfoMB createPrimRefArrayMSMBlurGrid(Scene* scene, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f))
- {
- /* first run to get #primitives */
- ParallelForForPrefixSumState<PrimInfoMB> pstate;
- Scene::Iterator<GridMesh,true> iter(scene);
-
- pstate.init(iter,size_t(1024));
- /* iterate over all meshes in the scene */
- PrimInfoMB pinfoMB = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t /*geomID*/) -> PrimInfoMB {
-
- PrimInfoMB pinfoMB(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue;
- LBBox3fa bounds(empty);
- PrimInfoMB gridMB(0,mesh->getNumSubGrids(j));
- pinfoMB.merge(gridMB);
- }
- return pinfoMB;
- }, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
-
- size_t numPrimitives = pinfoMB.size();
- if (numPrimitives == 0) return pinfoMB;
-
- /* resize arrays */
- sgrids.resize(numPrimitives);
- prims.resize(numPrimitives);
- /* second run to fill primrefs and SubGridBuildData arrays */
- pinfoMB = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
-
- k = base.size();
- size_t p_index = k;
- PrimInfoMB pinfoMB(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue;
- const GridMesh::Grid &g = mesh->grid(j);
-
- for (unsigned int y=0; y<g.resY-1u; y+=2)
- for (unsigned int x=0; x<g.resX-1u; x+=2)
- {
- const PrimRefMB prim(mesh->linearBounds(g,x,y,t0t1),mesh->numTimeSegments(),mesh->time_range,mesh->numTimeSegments(),unsigned(geomID),unsigned(p_index));
- pinfoMB.add_primref(prim);
- sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
- prims[p_index++] = prim;
- }
- }
- return pinfoMB;
- }, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
-
- assert(pinfoMB.size() == numPrimitives);
- pinfoMB.time_range = t0t1;
- return pinfoMB;
- }
-
- void build()
- {
- /* skip build for empty scene */
- const size_t numPrimitives = scene->getNumPrimitives(GridMesh::geom_type,true);
- if (numPrimitives == 0) { bvh->clear(); return; }
-
- double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderMBlurSAHGrid");
-
- //const size_t numTimeSteps = scene->getNumTimeSteps<GridMesh,true>();
- //const size_t numTimeSegments = numTimeSteps-1; assert(numTimeSteps > 1);
- //if (numTimeSegments == 1)
- // buildSingleSegment(numPrimitives);
- //else
- buildMultiSegment(numPrimitives);
-
- /* clear temporary data for static geometry */
- bvh->cleanup();
- bvh->postBuild(t0);
- }
-
-#if 0
- void buildSingleSegment(size_t numPrimitives)
- {
- /* create primref array */
- mvector<PrimRef> prims(scene->device,numPrimitives);
- const PrimInfo pinfo = createPrimRefArrayMBlurGrid(scene,prims,bvh->scene->progressInterface,0);
- /* early out if no valid primitives */
- if (pinfo.size() == 0) { bvh->clear(); return; }
-
- /* estimate acceleration structure size */
- const size_t node_bytes = pinfo.size()*sizeof(AABBNodeMB)/(4*N);
- //TODO: check leaf_bytes
- const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
-
- /* settings for BVH build */
- GeneralBVHBuilder::Settings settings;
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxBuildDepthLeaf;
- settings.logBlockSize = bsr(sahBlockSize);
- settings.minLeafSize = min(minLeafSize,maxLeafSize);
- settings.maxLeafSize = maxLeafSize;
- settings.travCost = travCost;
- settings.intCost = intCost;
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
-
- /* build hierarchy */
- auto root = BVHBuilderBinnedSAH::build<NodeRecordMB>
- (typename BVH::CreateAlloc(bvh),
- typename BVH::AABBNodeMB::Create(),
- typename BVH::AABBNodeMB::Set(),
- CreateLeafGridMB<N>(scene,bvh,sgrids.data()),
- bvh->scene->progressInterface,
- prims.data(),pinfo,settings);
-
- bvh->set(root.ref,root.lbounds,pinfo.size());
- }
-#endif
-
- void buildMultiSegment(size_t numPrimitives)
- {
- /* create primref array */
- mvector<PrimRefMB> prims(scene->device,numPrimitives);
- PrimInfoMB pinfo = createPrimRefArrayMSMBlurGrid(scene,prims,bvh->scene->progressInterface);
-
- /* early out if no valid primitives */
- if (pinfo.size() == 0) { bvh->clear(); return; }
-
-
-
- GridRecalculatePrimRef recalculatePrimRef(scene,sgrids.data());
-
- /* estimate acceleration structure size */
- const size_t node_bytes = pinfo.num_time_segments*sizeof(AABBNodeMB)/(4*N);
- //FIXME: check leaf_bytes
- //const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.num_time_segments)*sizeof(SubGridQBVHN<N>));
- const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
-
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
-
- /* settings for BVH build */
- BVHBuilderMSMBlur::Settings settings;
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxDepth;
- settings.logBlockSize = bsr(sahBlockSize);
- settings.minLeafSize = min(minLeafSize,maxLeafSize);
- settings.maxLeafSize = maxLeafSize;
- settings.travCost = travCost;
- settings.intCost = intCost;
- settings.singleLeafTimeSegment = false;
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
-
- /* build hierarchy */
- auto root =
- BVHBuilderMSMBlur::build<NodeRef>(prims,pinfo,scene->device,
- recalculatePrimRef,
- typename BVH::CreateAlloc(bvh),
- typename BVH::AABBNodeMB4D::Create(),
- typename BVH::AABBNodeMB4D::Set(),
- CreateMSMBlurLeafGrid<N>(scene,bvh,sgrids.data()),
- bvh->scene->progressInterface,
- settings);
- bvh->set(root.ref,root.lbounds,pinfo.num_time_segments);
- }
-
- void clear() {
- }
- };
-
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- Builder* BVH4Triangle4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,TriangleMesh,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
- Builder* BVH4Triangle4vMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,TriangleMesh,Triangle4vMB>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
-#if defined(__AVX__)
- Builder* BVH8Triangle4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,TriangleMesh,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
- Builder* BVH8Triangle4vMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,TriangleMesh,Triangle4vMB>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_QUAD)
- Builder* BVH4Quad4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,QuadMesh,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_QUAD_MESH); }
-#if defined(__AVX__)
- Builder* BVH8Quad4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,QuadMesh,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_QUAD_MESH); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_USER)
- Builder* BVH4VirtualMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
- int minLeafSize = scene->device->object_accel_mb_min_leaf_size;
- int maxLeafSize = scene->device->object_accel_mb_max_leaf_size;
- return new BVHNBuilderMBlurSAH<4,UserGeometry,Object>((BVH4*)bvh,scene,4,1.0f,minLeafSize,maxLeafSize,Geometry::MTY_USER_GEOMETRY);
- }
-#if defined(__AVX__)
- Builder* BVH8VirtualMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
- int minLeafSize = scene->device->object_accel_mb_min_leaf_size;
- int maxLeafSize = scene->device->object_accel_mb_max_leaf_size;
- return new BVHNBuilderMBlurSAH<8,UserGeometry,Object>((BVH8*)bvh,scene,8,1.0f,minLeafSize,maxLeafSize,Geometry::MTY_USER_GEOMETRY);
- }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- Builder* BVH4InstanceMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<4,Instance,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype); }
-#if defined(__AVX__)
- Builder* BVH8InstanceMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<8,Instance,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_GRID)
- Builder* BVH4GridMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAHGrid<4>((BVH4*)bvh,scene,4,1.0f,4,4); }
-#if defined(__AVX__)
- Builder* BVH8GridMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAHGrid<8>((BVH8*)bvh,scene,8,1.0f,8,8); }
-#endif
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_spatial.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_spatial.cpp
deleted file mode 100644
index 285b38c39d..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_spatial.cpp
+++ /dev/null
@@ -1,201 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh.h"
-#include "bvh_builder.h"
-
-#include "../builders/primrefgen.h"
-#include "../builders/primrefgen_presplit.h"
-#include "../builders/splitter.h"
-
-#include "../geometry/linei.h"
-#include "../geometry/triangle.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/trianglev_mb.h"
-#include "../geometry/trianglei.h"
-#include "../geometry/quadv.h"
-#include "../geometry/quadi.h"
-#include "../geometry/object.h"
-#include "../geometry/instance.h"
-#include "../geometry/subgrid.h"
-
-#include "../common/state.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int N, typename Primitive>
- struct CreateLeafSpatial
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
-
- __forceinline CreateLeafSpatial (BVH* bvh) : bvh(bvh) {}
-
- __forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
- {
- size_t n = set.size();
- size_t items = Primitive::blocks(n);
- size_t start = set.begin();
- Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
- typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
- for (size_t i=0; i<items; i++) {
- accel[i].fill(prims,start,set.end(),bvh->scene);
- }
- return node;
- }
-
- BVH* bvh;
- };
-
- template<int N, typename Mesh, typename Primitive, typename Splitter>
- struct BVHNBuilderFastSpatialSAH : public Builder
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- BVH* bvh;
- Scene* scene;
- Mesh* mesh;
- mvector<PrimRef> prims0;
- GeneralBVHBuilder::Settings settings;
- const float splitFactor;
- unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
- unsigned int numPreviousPrimitives = 0;
-
- BVHNBuilderFastSpatialSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
- : bvh(bvh), scene(scene), mesh(nullptr), prims0(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD),
- splitFactor(scene->device->max_spatial_split_replications) {}
-
- BVHNBuilderFastSpatialSAH (BVH* bvh, Mesh* mesh, const unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
- : bvh(bvh), scene(nullptr), mesh(mesh), prims0(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD),
- splitFactor(scene->device->max_spatial_split_replications), geomID_(geomID) {}
-
- // FIXME: shrink bvh->alloc in destructor here and in other builders too
-
- void build()
- {
- /* we reset the allocator when the mesh size changed */
- if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
- bvh->alloc.clear();
- }
-
- /* skip build for empty scene */
- const size_t numOriginalPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(Mesh::geom_type,false);
- numPreviousPrimitives = numOriginalPrimitives;
- if (numOriginalPrimitives == 0) {
- prims0.clear();
- bvh->clear();
- return;
- }
-
- const unsigned int maxGeomID = mesh ? geomID_ : scene->getMaxGeomID<Mesh,false>();
- const bool usePreSplits = scene->device->useSpatialPreSplits || (maxGeomID >= ((unsigned int)1 << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)));
- double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + (usePreSplits ? "BuilderFastSpatialPresplitSAH" : "BuilderFastSpatialSAH"));
-
- /* create primref array */
- const size_t numSplitPrimitives = max(numOriginalPrimitives,size_t(splitFactor*numOriginalPrimitives));
- prims0.resize(numSplitPrimitives);
-
- /* enable os_malloc for two level build */
- if (mesh)
- bvh->alloc.setOSallocation(true);
-
- NodeRef root(0);
- PrimInfo pinfo;
-
-
- if (likely(usePreSplits))
- {
- /* spatial presplit SAH BVH builder */
- pinfo = mesh ?
- createPrimRefArray_presplit<Mesh,Splitter>(mesh,maxGeomID,numOriginalPrimitives,prims0,bvh->scene->progressInterface) :
- createPrimRefArray_presplit<Mesh,Splitter>(scene,Mesh::geom_type,false,numOriginalPrimitives,prims0,bvh->scene->progressInterface);
-
- const size_t node_bytes = pinfo.size()*sizeof(typename BVH::AABBNode)/(4*N);
- const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
-
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxBuildDepthLeaf;
-
- /* call BVH builder */
- root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeafSpatial<N,Primitive>(bvh),bvh->scene->progressInterface,prims0.data(),pinfo,settings);
- }
- else
- {
- /* standard spatial split SAH BVH builder */
- pinfo = mesh ?
- createPrimRefArray(mesh,geomID_,/*numSplitPrimitives,*/prims0,bvh->scene->progressInterface) :
- createPrimRefArray(scene,Mesh::geom_type,false,/*numSplitPrimitives,*/prims0,bvh->scene->progressInterface);
-
- Splitter splitter(scene);
-
- const size_t node_bytes = pinfo.size()*sizeof(typename BVH::AABBNode)/(4*N);
- const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
- settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
-
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxBuildDepthLeaf;
-
- /* call BVH builder */
- root = BVHBuilderBinnedFastSpatialSAH::build<NodeRef>(
- typename BVH::CreateAlloc(bvh),
- typename BVH::AABBNode::Create2(),
- typename BVH::AABBNode::Set2(),
- CreateLeafSpatial<N,Primitive>(bvh),
- splitter,
- bvh->scene->progressInterface,
- prims0.data(),
- numSplitPrimitives,
- pinfo,settings);
-
- /* ==================== */
- }
-
- bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
- bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
-
- /* clear temporary data for static geometry */
- if (scene && scene->isStaticAccel()) {
- prims0.clear();
- }
- bvh->cleanup();
- bvh->postBuild(t0);
- }
-
- void clear() {
- prims0.clear();
- }
- };
-
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
-
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
-
- Builder* BVH4Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
- Builder* BVH4Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
- Builder* BVH4Triangle4iSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4i,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
-
-#if defined(__AVX__)
- Builder* BVH8Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
- Builder* BVH8Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_QUAD)
- Builder* BVH4Quad4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,QuadMesh,Quad4v,QuadSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
-
-#if defined(__AVX__)
- Builder* BVH8Quad4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,QuadMesh,Quad4v,QuadSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
-#endif
-
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.cpp
deleted file mode 100644
index 1a78f347ac..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.cpp
+++ /dev/null
@@ -1,377 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh_builder_twolevel.h"
-#include "bvh_statistics.h"
-#include "../builders/bvh_builder_sah.h"
-#include "../common/scene_line_segments.h"
-#include "../common/scene_triangle_mesh.h"
-#include "../common/scene_quad_mesh.h"
-
-#define PROFILE 0
-
-namespace embree
-{
- namespace isa
- {
- template<int N, typename Mesh, typename Primitive>
- BVHNBuilderTwoLevel<N,Mesh,Primitive>::BVHNBuilderTwoLevel (BVH* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder, const size_t singleThreadThreshold)
- : bvh(bvh), scene(scene), refs(scene->device,0), prims(scene->device,0), singleThreadThreshold(singleThreadThreshold), gtype(gtype), useMortonBuilder_(useMortonBuilder) {}
-
- template<int N, typename Mesh, typename Primitive>
- BVHNBuilderTwoLevel<N,Mesh,Primitive>::~BVHNBuilderTwoLevel () {
- }
-
- // ===========================================================================
- // ===========================================================================
- // ===========================================================================
-
- template<int N, typename Mesh, typename Primitive>
- void BVHNBuilderTwoLevel<N,Mesh,Primitive>::build()
- {
- /* delete some objects */
- size_t num = scene->size();
- if (num < bvh->objects.size()) {
- parallel_for(num, bvh->objects.size(), [&] (const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- builders[i].reset();
- delete bvh->objects[i]; bvh->objects[i] = nullptr;
- }
- });
- }
-
-#if PROFILE
- while(1)
-#endif
- {
- /* reset memory allocator */
- bvh->alloc.reset();
-
- /* skip build for empty scene */
- const size_t numPrimitives = scene->getNumPrimitives(gtype,false);
-
- if (numPrimitives == 0) {
- prims.resize(0);
- bvh->set(BVH::emptyNode,empty,0);
- return;
- }
-
- /* calculate the size of the entire BVH */
- const size_t numLeafBlocks = Primitive::blocks(numPrimitives);
- const size_t node_bytes = 2*numLeafBlocks*sizeof(typename BVH::AABBNode)/N;
- const size_t leaf_bytes = size_t(1.2*numLeafBlocks*sizeof(Primitive));
- bvh->alloc.init_estimate(node_bytes+leaf_bytes);
-
- double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderTwoLevel");
-
- /* resize object array if scene got larger */
- if (bvh->objects.size() < num) bvh->objects.resize(num);
- if (builders.size() < num) builders.resize(num);
- resizeRefsList ();
- nextRef.store(0);
-
- /* create acceleration structures */
- parallel_for(size_t(0), num, [&] (const range<size_t>& r)
- {
- for (size_t objectID=r.begin(); objectID<r.end(); objectID++)
- {
- Mesh* mesh = scene->getSafe<Mesh>(objectID);
-
- /* ignore meshes we do not support */
- if (mesh == nullptr || mesh->numTimeSteps != 1)
- continue;
-
- if (isSmallGeometry(mesh)) {
- setupSmallBuildRefBuilder (objectID, mesh);
- } else {
- setupLargeBuildRefBuilder (objectID, mesh);
- }
- }
- });
-
- /* parallel build of acceleration structures */
- parallel_for(size_t(0), num, [&] (const range<size_t>& r)
- {
- for (size_t objectID=r.begin(); objectID<r.end(); objectID++)
- {
- /* ignore if no triangle mesh or not enabled */
- Mesh* mesh = scene->getSafe<Mesh>(objectID);
- if (mesh == nullptr || !mesh->isEnabled() || mesh->numTimeSteps != 1)
- continue;
-
- builders[objectID]->attachBuildRefs (this);
- }
- });
-
-
-#if PROFILE
- double d0 = getSeconds();
-#endif
- /* fast path for single geometry scenes */
- if (nextRef == 1) {
- bvh->set(refs[0].node,LBBox3fa(refs[0].bounds()),numPrimitives);
- }
-
- else
- {
- /* open all large nodes */
- refs.resize(nextRef);
-
- /* this probably needs some more tuning */
- const size_t extSize = max(max((size_t)SPLIT_MIN_EXT_SPACE,refs.size()*SPLIT_MEMORY_RESERVE_SCALE),size_t((float)numPrimitives / SPLIT_MEMORY_RESERVE_FACTOR));
-
-#if !ENABLE_DIRECT_SAH_MERGE_BUILDER
-
-#if ENABLE_OPEN_SEQUENTIAL
- open_sequential(extSize);
-#endif
- /* compute PrimRefs */
- prims.resize(refs.size());
-#endif
-
-#if defined(TASKING_TBB) && defined(__AVX512ER__) && USE_TASK_ARENA // KNL
- tbb::task_arena limited(min(32,(int)TaskScheduler::threadCount()));
- limited.execute([&]
-#endif
- {
-#if ENABLE_DIRECT_SAH_MERGE_BUILDER
-
- const PrimInfo pinfo = parallel_reduce(size_t(0), refs.size(), PrimInfo(empty), [&] (const range<size_t>& r) -> PrimInfo {
-
- PrimInfo pinfo(empty);
- for (size_t i=r.begin(); i<r.end(); i++) {
- pinfo.add_center2(refs[i]);
- }
- return pinfo;
- }, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
-
-#else
- const PrimInfo pinfo = parallel_reduce(size_t(0), refs.size(), PrimInfo(empty), [&] (const range<size_t>& r) -> PrimInfo {
-
- PrimInfo pinfo(empty);
- for (size_t i=r.begin(); i<r.end(); i++) {
- pinfo.add_center2(refs[i]);
- prims[i] = PrimRef(refs[i].bounds(),(size_t)refs[i].node);
- }
- return pinfo;
- }, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
-#endif
-
- /* skip if all objects where empty */
- if (pinfo.size() == 0)
- bvh->set(BVH::emptyNode,empty,0);
-
- /* otherwise build toplevel hierarchy */
- else
- {
- /* settings for BVH build */
- GeneralBVHBuilder::Settings settings;
- settings.branchingFactor = N;
- settings.maxDepth = BVH::maxBuildDepthLeaf;
- settings.logBlockSize = bsr(N);
- settings.minLeafSize = 1;
- settings.maxLeafSize = 1;
- settings.travCost = 1.0f;
- settings.intCost = 1.0f;
- settings.singleThreadThreshold = singleThreadThreshold;
-
-#if ENABLE_DIRECT_SAH_MERGE_BUILDER
-
- refs.resize(extSize);
-
- NodeRef root = BVHBuilderBinnedOpenMergeSAH::build<NodeRef,BuildRef>(
- typename BVH::CreateAlloc(bvh),
- typename BVH::AABBNode::Create2(),
- typename BVH::AABBNode::Set2(),
-
- [&] (const BuildRef* refs, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> NodeRef {
- assert(range.size() == 1);
- return (NodeRef) refs[range.begin()].node;
- },
- [&] (BuildRef &bref, BuildRef *refs) -> size_t {
- return openBuildRef(bref,refs);
- },
- [&] (size_t dn) { bvh->scene->progressMonitor(0); },
- refs.data(),extSize,pinfo,settings);
-#else
- NodeRef root = BVHBuilderBinnedSAH::build<NodeRef>(
- typename BVH::CreateAlloc(bvh),
- typename BVH::AABBNode::Create2(),
- typename BVH::AABBNode::Set2(),
-
- [&] (const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> NodeRef {
- assert(range.size() == 1);
- return (NodeRef) prims[range.begin()].ID();
- },
- [&] (size_t dn) { bvh->scene->progressMonitor(0); },
- prims.data(),pinfo,settings);
-#endif
-
-
- bvh->set(root,LBBox3fa(pinfo.geomBounds),numPrimitives);
- }
- }
-#if defined(TASKING_TBB) && defined(__AVX512ER__) && USE_TASK_ARENA // KNL
- );
-#endif
-
- }
-
- bvh->alloc.cleanup();
- bvh->postBuild(t0);
-#if PROFILE
- double d1 = getSeconds();
- std::cout << "TOP_LEVEL OPENING/REBUILD TIME " << 1000.0*(d1-d0) << " ms" << std::endl;
-#endif
- }
-
- }
-
- template<int N, typename Mesh, typename Primitive>
- void BVHNBuilderTwoLevel<N,Mesh,Primitive>::deleteGeometry(size_t geomID)
- {
- if (geomID >= bvh->objects.size()) return;
- if (builders[geomID]) builders[geomID].reset();
- delete bvh->objects [geomID]; bvh->objects [geomID] = nullptr;
- }
-
- template<int N, typename Mesh, typename Primitive>
- void BVHNBuilderTwoLevel<N,Mesh,Primitive>::clear()
- {
- for (size_t i=0; i<bvh->objects.size(); i++)
- if (bvh->objects[i]) bvh->objects[i]->clear();
-
- for (size_t i=0; i<builders.size(); i++)
- if (builders[i]) builders[i].reset();
-
- refs.clear();
- }
-
- template<int N, typename Mesh, typename Primitive>
- void BVHNBuilderTwoLevel<N,Mesh,Primitive>::open_sequential(const size_t extSize)
- {
- if (refs.size() == 0)
- return;
-
- refs.reserve(extSize);
-
-#if 1
- for (size_t i=0;i<refs.size();i++)
- {
- NodeRef ref = refs[i].node;
- if (ref.isAABBNode())
- BVH::prefetch(ref);
- }
-#endif
-
- std::make_heap(refs.begin(),refs.end());
- while (refs.size()+N-1 <= extSize)
- {
- std::pop_heap (refs.begin(),refs.end());
- NodeRef ref = refs.back().node;
- if (ref.isLeaf()) break;
- refs.pop_back();
-
- AABBNode* node = ref.getAABBNode();
- for (size_t i=0; i<N; i++) {
- if (node->child(i) == BVH::emptyNode) continue;
- refs.push_back(BuildRef(node->bounds(i),node->child(i)));
-
-#if 1
- NodeRef ref_pre = node->child(i);
- if (ref_pre.isAABBNode())
- ref_pre.prefetch();
-#endif
- std::push_heap (refs.begin(),refs.end());
- }
- }
- }
-
- template<int N, typename Mesh, typename Primitive>
- void BVHNBuilderTwoLevel<N,Mesh,Primitive>::setupSmallBuildRefBuilder (size_t objectID, Mesh const * const /*mesh*/)
- {
- if (builders[objectID] == nullptr || // new mesh
- dynamic_cast<RefBuilderSmall*>(builders[objectID].get()) == nullptr) // size change resulted in large->small change
- {
- builders[objectID].reset (new RefBuilderSmall(objectID));
- }
- }
-
- template<int N, typename Mesh, typename Primitive>
- void BVHNBuilderTwoLevel<N,Mesh,Primitive>::setupLargeBuildRefBuilder (size_t objectID, Mesh const * const mesh)
- {
- if (bvh->objects[objectID] == nullptr || // new mesh
- builders[objectID]->meshQualityChanged (mesh->quality) || // changed build quality
- dynamic_cast<RefBuilderLarge*>(builders[objectID].get()) == nullptr) // size change resulted in small->large change
- {
- Builder* builder = nullptr;
- delete bvh->objects[objectID];
- createMeshAccel(objectID, builder);
- builders[objectID].reset (new RefBuilderLarge(objectID, builder, mesh->quality));
- }
- }
-
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- Builder* BVH4BuilderTwoLevelTriangle4MeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
- }
- Builder* BVH4BuilderTwoLevelTriangle4vMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4v>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
- }
- Builder* BVH4BuilderTwoLevelTriangle4iMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4i>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
- }
-#endif
-
-#if defined(EMBREE_GEOMETRY_QUAD)
- Builder* BVH4BuilderTwoLevelQuadMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<4,QuadMesh,Quad4v>((BVH4*)bvh,scene,QuadMesh::geom_type,useMortonBuilder);
- }
-#endif
-
-#if defined(EMBREE_GEOMETRY_USER)
- Builder* BVH4BuilderTwoLevelVirtualSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<4,UserGeometry,Object>((BVH4*)bvh,scene,UserGeometry::geom_type,useMortonBuilder);
- }
-#endif
-
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- Builder* BVH4BuilderTwoLevelInstanceSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<4,Instance,InstancePrimitive>((BVH4*)bvh,scene,gtype,useMortonBuilder);
- }
-#endif
-
-#if defined(__AVX__)
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- Builder* BVH8BuilderTwoLevelTriangle4MeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
- }
- Builder* BVH8BuilderTwoLevelTriangle4vMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4v>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
- }
- Builder* BVH8BuilderTwoLevelTriangle4iMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4i>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
- }
-#endif
-
-#if defined(EMBREE_GEOMETRY_QUAD)
- Builder* BVH8BuilderTwoLevelQuadMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<8,QuadMesh,Quad4v>((BVH8*)bvh,scene,QuadMesh::geom_type,useMortonBuilder);
- }
-#endif
-
-#if defined(EMBREE_GEOMETRY_USER)
- Builder* BVH8BuilderTwoLevelVirtualSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<8,UserGeometry,Object>((BVH8*)bvh,scene,UserGeometry::geom_type,useMortonBuilder);
- }
-#endif
-
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- Builder* BVH8BuilderTwoLevelInstanceSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
- return new BVHNBuilderTwoLevel<8,Instance,InstancePrimitive>((BVH8*)bvh,scene,gtype,useMortonBuilder);
- }
-#endif
-
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.h
deleted file mode 100644
index 8f57c3b406..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.h
+++ /dev/null
@@ -1,263 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include <type_traits>
-
-#include "bvh_builder_twolevel_internal.h"
-#include "bvh.h"
-#include "../common/primref.h"
-#include "../builders/priminfo.h"
-#include "../builders/primrefgen.h"
-
-/* new open/merge builder */
-#define ENABLE_DIRECT_SAH_MERGE_BUILDER 1
-#define ENABLE_OPEN_SEQUENTIAL 0
-#define SPLIT_MEMORY_RESERVE_FACTOR 1000
-#define SPLIT_MEMORY_RESERVE_SCALE 2
-#define SPLIT_MIN_EXT_SPACE 1000
-
-namespace embree
-{
- namespace isa
- {
- template<int N, typename Mesh, typename Primitive>
- class BVHNBuilderTwoLevel : public Builder
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::AABBNode AABBNode;
- typedef typename BVH::NodeRef NodeRef;
-
- __forceinline static bool isSmallGeometry(Mesh* mesh) {
- return mesh->size() <= 4;
- }
-
- public:
-
- typedef void (*createMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
-
- struct BuildRef : public PrimRef
- {
- public:
- __forceinline BuildRef () {}
-
- __forceinline BuildRef (const BBox3fa& bounds, NodeRef node)
- : PrimRef(bounds,(size_t)node), node(node)
- {
- if (node.isLeaf())
- bounds_area = 0.0f;
- else
- bounds_area = area(this->bounds());
- }
-
- /* used by the open/merge bvh builder */
- __forceinline BuildRef (const BBox3fa& bounds, NodeRef node, const unsigned int geomID, const unsigned int numPrimitives)
- : PrimRef(bounds,geomID,numPrimitives), node(node)
- {
- /* important for relative buildref ordering */
- if (node.isLeaf())
- bounds_area = 0.0f;
- else
- bounds_area = area(this->bounds());
- }
-
- __forceinline size_t size() const {
- return primID();
- }
-
- friend bool operator< (const BuildRef& a, const BuildRef& b) {
- return a.bounds_area < b.bounds_area;
- }
-
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const BuildRef& ref) {
- return cout << "{ lower = " << ref.lower << ", upper = " << ref.upper << ", center2 = " << ref.center2() << ", geomID = " << ref.geomID() << ", numPrimitives = " << ref.numPrimitives() << ", bounds_area = " << ref.bounds_area << " }";
- }
-
- __forceinline unsigned int numPrimitives() const { return primID(); }
-
- public:
- NodeRef node;
- float bounds_area;
- };
-
-
- __forceinline size_t openBuildRef(BuildRef &bref, BuildRef *const refs) {
- if (bref.node.isLeaf())
- {
- refs[0] = bref;
- return 1;
- }
- NodeRef ref = bref.node;
- unsigned int geomID = bref.geomID();
- unsigned int numPrims = max((unsigned int)bref.numPrimitives() / N,(unsigned int)1);
- AABBNode* node = ref.getAABBNode();
- size_t n = 0;
- for (size_t i=0; i<N; i++) {
- if (node->child(i) == BVH::emptyNode) continue;
- refs[i] = BuildRef(node->bounds(i),node->child(i),geomID,numPrims);
- n++;
- }
- assert(n > 1);
- return n;
- }
-
- /*! Constructor. */
- BVHNBuilderTwoLevel (BVH* bvh, Scene* scene, Geometry::GTypeMask gtype = Mesh::geom_type, bool useMortonBuilder = false, const size_t singleThreadThreshold = DEFAULT_SINGLE_THREAD_THRESHOLD);
-
- /*! Destructor */
- ~BVHNBuilderTwoLevel ();
-
- /*! builder entry point */
- void build();
- void deleteGeometry(size_t geomID);
- void clear();
-
- void open_sequential(const size_t extSize);
-
- private:
-
- class RefBuilderBase {
- public:
- virtual ~RefBuilderBase () {}
- virtual void attachBuildRefs (BVHNBuilderTwoLevel* builder) = 0;
- virtual bool meshQualityChanged (RTCBuildQuality currQuality) = 0;
- };
-
- class RefBuilderSmall : public RefBuilderBase {
- public:
-
- RefBuilderSmall (size_t objectID)
- : objectID_ (objectID) {}
-
- void attachBuildRefs (BVHNBuilderTwoLevel* topBuilder) {
-
- Mesh* mesh = topBuilder->scene->template getSafe<Mesh>(objectID_);
- size_t meshSize = mesh->size();
- assert(isSmallGeometry(mesh));
-
- mvector<PrimRef> prefs(topBuilder->scene->device, meshSize);
- auto pinfo = createPrimRefArray(mesh,objectID_,prefs,topBuilder->bvh->scene->progressInterface);
-
- size_t begin=0;
- while (begin < pinfo.size())
- {
- Primitive* accel = (Primitive*) topBuilder->bvh->alloc.getCachedAllocator().malloc1(sizeof(Primitive),BVH::byteAlignment);
- typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,1);
- accel->fill(prefs.data(),begin,pinfo.size(),topBuilder->bvh->scene);
-
- /* create build primitive */
-#if ENABLE_DIRECT_SAH_MERGE_BUILDER
- topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(pinfo.geomBounds,node,(unsigned int)objectID_,1);
-#else
- topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(pinfo.geomBounds,node);
-#endif
- }
- assert(begin == pinfo.size());
- }
-
- bool meshQualityChanged (RTCBuildQuality /*currQuality*/) {
- return false;
- }
-
- size_t objectID_;
- };
-
- class RefBuilderLarge : public RefBuilderBase {
- public:
-
- RefBuilderLarge (size_t objectID, const Ref<Builder>& builder, RTCBuildQuality quality)
- : objectID_ (objectID), builder_ (builder), quality_ (quality) {}
-
- void attachBuildRefs (BVHNBuilderTwoLevel* topBuilder)
- {
- BVH* object = topBuilder->getBVH(objectID_); assert(object);
-
- /* build object if it got modified */
- if (topBuilder->isGeometryModified(objectID_))
- builder_->build();
-
- /* create build primitive */
- if (!object->getBounds().empty())
- {
-#if ENABLE_DIRECT_SAH_MERGE_BUILDER
- Mesh* mesh = topBuilder->getMesh(objectID_);
- topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(object->getBounds(),object->root,(unsigned int)objectID_,(unsigned int)mesh->size());
-#else
- topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(object->getBounds(),object->root);
-#endif
- }
- }
-
- bool meshQualityChanged (RTCBuildQuality currQuality) {
- return currQuality != quality_;
- }
-
- private:
- size_t objectID_;
- Ref<Builder> builder_;
- RTCBuildQuality quality_;
- };
-
- void setupLargeBuildRefBuilder (size_t objectID, Mesh const * const mesh);
- void setupSmallBuildRefBuilder (size_t objectID, Mesh const * const mesh);
-
- BVH* getBVH (size_t objectID) {
- return this->bvh->objects[objectID];
- }
- Mesh* getMesh (size_t objectID) {
- return this->scene->template getSafe<Mesh>(objectID);
- }
- bool isGeometryModified (size_t objectID) {
- return this->scene->isGeometryModified(objectID);
- }
-
- void resizeRefsList ()
- {
- size_t num = parallel_reduce (size_t(0), scene->size(), size_t(0),
- [this](const range<size_t>& r)->size_t {
- size_t c = 0;
- for (auto i=r.begin(); i<r.end(); ++i) {
- Mesh* mesh = scene->getSafe<Mesh>(i);
- if (mesh == nullptr || mesh->numTimeSteps != 1)
- continue;
- size_t meshSize = mesh->size();
- c += isSmallGeometry(mesh) ? Primitive::blocks(meshSize) : 1;
- }
- return c;
- },
- std::plus<size_t>()
- );
-
- if (refs.size() < num) {
- refs.resize(num);
- }
- }
-
- void createMeshAccel (size_t geomID, Builder*& builder)
- {
- bvh->objects[geomID] = new BVH(Primitive::type,scene);
- BVH* accel = bvh->objects[geomID];
- auto mesh = scene->getSafe<Mesh>(geomID);
- if (nullptr == mesh) {
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"geomID does not return correct type");
- return;
- }
-
- __internal_two_level_builder__::MeshBuilder<N,Mesh,Primitive>()(accel, mesh, geomID, this->gtype, this->useMortonBuilder_, builder);
- }
-
- using BuilderList = std::vector<std::unique_ptr<RefBuilderBase>>;
-
- BuilderList builders;
- BVH* bvh;
- Scene* scene;
- mvector<BuildRef> refs;
- mvector<PrimRef> prims;
- std::atomic<int> nextRef;
- const size_t singleThreadThreshold;
- Geometry::GTypeMask gtype;
- bool useMortonBuilder_ = false;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel_internal.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel_internal.h
deleted file mode 100644
index 1c1ae8d6a7..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel_internal.h
+++ /dev/null
@@ -1,267 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-#include "../geometry/triangle.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/trianglei.h"
-#include "../geometry/quadv.h"
-#include "../geometry/quadi.h"
-#include "../geometry/object.h"
-#include "../geometry/instance.h"
-
-namespace embree
-{
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshBuilderMortonGeneral,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshBuilderSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshRefitSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshBuilderMortonGeneral,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshBuilderSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshRefitSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshBuilderMortonGeneral,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshBuilderSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshRefitSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshBuilderMortonGeneral,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshBuilderSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshRefitSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshBuilderMortonGeneral,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshBuilderSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshRefitSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshBuilderMortonGeneral,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshBuilderSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
- DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshRefitSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
-
- namespace isa
- {
-
- namespace __internal_two_level_builder__ {
-
- template<int N, typename Mesh, typename Primitive>
- struct MortonBuilder {};
- template<>
- struct MortonBuilder<4,TriangleMesh,Triangle4> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<4,TriangleMesh,Triangle4v> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<4,TriangleMesh,Triangle4i> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<4,QuadMesh,Quad4v> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<4,UserGeometry,Object> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<4,Instance,InstancePrimitive> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
- };
- template<>
- struct MortonBuilder<8,TriangleMesh,Triangle4> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<8,TriangleMesh,Triangle4v> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<8,TriangleMesh,Triangle4i> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<8,QuadMesh,Quad4v> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<8,UserGeometry,Object> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
- };
- template<>
- struct MortonBuilder<8,Instance,InstancePrimitive> {
- MortonBuilder () {}
- Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
- };
-
- template<int N, typename Mesh, typename Primitive>
- struct SAHBuilder {};
- template<>
- struct SAHBuilder<4,TriangleMesh,Triangle4> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<4,TriangleMesh,Triangle4v> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<4,TriangleMesh,Triangle4i> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<4,QuadMesh,Quad4v> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<4,UserGeometry,Object> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<4,Instance,InstancePrimitive> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
- };
- template<>
- struct SAHBuilder<8,TriangleMesh,Triangle4> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<8,TriangleMesh,Triangle4v> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<8,TriangleMesh,Triangle4i> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<8,QuadMesh,Quad4v> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<8,UserGeometry,Object> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshBuilderSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct SAHBuilder<8,Instance,InstancePrimitive> {
- SAHBuilder () {}
- Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
- };
-
- template<int N, typename Mesh, typename Primitive>
- struct RefitBuilder {};
- template<>
- struct RefitBuilder<4,TriangleMesh,Triangle4> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<4,TriangleMesh,Triangle4v> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<4,TriangleMesh,Triangle4i> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<4,QuadMesh,Quad4v> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<4,UserGeometry,Object> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<4,Instance,InstancePrimitive> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
- };
- template<>
- struct RefitBuilder<8,TriangleMesh,Triangle4> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<8,TriangleMesh,Triangle4v> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<8,TriangleMesh,Triangle4i> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<8,QuadMesh,Quad4v> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<8,UserGeometry,Object> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshRefitSAH(bvh,mesh,geomID,0);}
- };
- template<>
- struct RefitBuilder<8,Instance,InstancePrimitive> {
- RefitBuilder () {}
- Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
- };
-
- template<int N, typename Mesh, typename Primitive>
- struct MeshBuilder {
- MeshBuilder () {}
- void operator () (void* bvh, Mesh* mesh, size_t geomID, Geometry::GTypeMask gtype, bool useMortonBuilder, Builder*& builder) {
- if(useMortonBuilder) {
- builder = MortonBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype);
- return;
- }
- switch (mesh->quality) {
- case RTC_BUILD_QUALITY_LOW: builder = MortonBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
- case RTC_BUILD_QUALITY_MEDIUM:
- case RTC_BUILD_QUALITY_HIGH: builder = SAHBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
- case RTC_BUILD_QUALITY_REFIT: builder = RefitBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
- default: throw_RTCError(RTC_ERROR_UNKNOWN,"invalid build quality");
- }
- }
- };
- }
- }
-} \ No newline at end of file
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.cpp
deleted file mode 100644
index a27be8bae8..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.cpp
+++ /dev/null
@@ -1,375 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh_collider.h"
-#include "../geometry/triangle_triangle_intersector.h"
-
-namespace embree
-{
- namespace isa
- {
-#define CSTAT(x)
-
- size_t parallel_depth_threshold = 3;
- CSTAT(std::atomic<size_t> bvh_collide_traversal_steps(0));
- CSTAT(std::atomic<size_t> bvh_collide_leaf_pairs(0));
- CSTAT(std::atomic<size_t> bvh_collide_leaf_iterations(0));
- CSTAT(std::atomic<size_t> bvh_collide_prim_intersections1(0));
- CSTAT(std::atomic<size_t> bvh_collide_prim_intersections2(0));
- CSTAT(std::atomic<size_t> bvh_collide_prim_intersections3(0));
- CSTAT(std::atomic<size_t> bvh_collide_prim_intersections4(0));
- CSTAT(std::atomic<size_t> bvh_collide_prim_intersections5(0));
- CSTAT(std::atomic<size_t> bvh_collide_prim_intersections(0));
-
- struct Collision
- {
- __forceinline Collision() {}
-
- __forceinline Collision (unsigned geomID0, unsigned primID0, unsigned geomID1, unsigned primID1)
- : geomID0(geomID0), primID0(primID0), geomID1(geomID1), primID1(primID1) {}
-
- unsigned geomID0;
- unsigned primID0;
- unsigned geomID1;
- unsigned primID1;
- };
-
- template<int N>
- __forceinline size_t overlap(const BBox3fa& box0, const typename BVHN<N>::AABBNode& node1)
- {
- const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x),node1.lower_x);
- const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y),node1.lower_y);
- const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z),node1.lower_z);
- const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x),node1.upper_x);
- const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y),node1.upper_y);
- const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z),node1.upper_z);
- return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
- }
-
- template<int N>
- __forceinline size_t overlap(const BBox3fa& box0, const BBox<Vec3<vfloat<N>>>& box1)
- {
- const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x),box1.lower.x);
- const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y),box1.lower.y);
- const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z),box1.lower.z);
- const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x),box1.upper.x);
- const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y),box1.upper.y);
- const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z),box1.upper.z);
- return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
- }
-
- template<int N>
- __forceinline size_t overlap(const BBox<Vec3<vfloat<N>>>& box0, size_t i, const BBox<Vec3<vfloat<N>>>& box1)
- {
- const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x[i]),box1.lower.x);
- const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y[i]),box1.lower.y);
- const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z[i]),box1.lower.z);
- const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x[i]),box1.upper.x);
- const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y[i]),box1.upper.y);
- const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z[i]),box1.upper.z);
- return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
- }
-
- bool intersect_triangle_triangle (Scene* scene0, unsigned geomID0, unsigned primID0, Scene* scene1, unsigned geomID1, unsigned primID1)
- {
- CSTAT(bvh_collide_prim_intersections1++);
- const TriangleMesh* mesh0 = scene0->get<TriangleMesh>(geomID0);
- const TriangleMesh* mesh1 = scene1->get<TriangleMesh>(geomID1);
- const TriangleMesh::Triangle& tri0 = mesh0->triangle(primID0);
- const TriangleMesh::Triangle& tri1 = mesh1->triangle(primID1);
-
- /* special culling for scene intersection with itself */
- if (scene0 == scene1 && geomID0 == geomID1)
- {
- /* ignore self intersections */
- if (primID0 == primID1)
- return false;
- }
- CSTAT(bvh_collide_prim_intersections2++);
-
- if (scene0 == scene1 && geomID0 == geomID1)
- {
- /* ignore intersection with topological neighbors */
- const vint4 t0(tri0.v[0],tri0.v[1],tri0.v[2],tri0.v[2]);
- if (any(vint4(tri1.v[0]) == t0)) return false;
- if (any(vint4(tri1.v[1]) == t0)) return false;
- if (any(vint4(tri1.v[2]) == t0)) return false;
- }
- CSTAT(bvh_collide_prim_intersections3++);
-
- const Vec3fa a0 = mesh0->vertex(tri0.v[0]);
- const Vec3fa a1 = mesh0->vertex(tri0.v[1]);
- const Vec3fa a2 = mesh0->vertex(tri0.v[2]);
- const Vec3fa b0 = mesh1->vertex(tri1.v[0]);
- const Vec3fa b1 = mesh1->vertex(tri1.v[1]);
- const Vec3fa b2 = mesh1->vertex(tri1.v[2]);
-
- return TriangleTriangleIntersector::intersect_triangle_triangle(a0,a1,a2,b0,b1,b2);
- }
-
- template<int N>
- __forceinline void BVHNColliderUserGeom<N>::processLeaf(NodeRef node0, NodeRef node1)
- {
- Collision collisions[16];
- size_t num_collisions = 0;
-
- size_t N0; Object* leaf0 = (Object*) node0.leaf(N0);
- size_t N1; Object* leaf1 = (Object*) node1.leaf(N1);
- for (size_t i=0; i<N0; i++) {
- for (size_t j=0; j<N1; j++) {
- const unsigned geomID0 = leaf0[i].geomID();
- const unsigned primID0 = leaf0[i].primID();
- const unsigned geomID1 = leaf1[j].geomID();
- const unsigned primID1 = leaf1[j].primID();
- if (this->scene0 == this->scene1 && geomID0 == geomID1 && primID0 == primID1) continue;
- collisions[num_collisions++] = Collision(geomID0,primID0,geomID1,primID1);
- if (num_collisions == 16) {
- this->callback(this->userPtr,(RTCCollision*)&collisions,num_collisions);
- num_collisions = 0;
- }
- }
- }
- if (num_collisions)
- this->callback(this->userPtr,(RTCCollision*)&collisions,num_collisions);
- }
-
- template<int N>
- void BVHNCollider<N>::collide_recurse(NodeRef ref0, const BBox3fa& bounds0, NodeRef ref1, const BBox3fa& bounds1, size_t depth0, size_t depth1)
- {
- CSTAT(bvh_collide_traversal_steps++);
- if (unlikely(ref0.isLeaf())) {
- if (unlikely(ref1.isLeaf())) {
- CSTAT(bvh_collide_leaf_pairs++);
- processLeaf(ref0,ref1);
- return;
- } else goto recurse_node1;
-
- } else {
- if (unlikely(ref1.isLeaf())) {
- goto recurse_node0;
- } else {
- if (area(bounds0) > area(bounds1)) {
- goto recurse_node0;
- }
- else {
- goto recurse_node1;
- }
- }
- }
-
- {
- recurse_node0:
- AABBNode* node0 = ref0.getAABBNode();
- size_t mask = overlap<N>(bounds1,*node0);
- //for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
- //for (size_t i=0; i<N; i++) {
-#if 0
- if (depth0 < parallel_depth_threshold)
- {
- parallel_for(size_t(N), [&] ( size_t i ) {
- if (mask & ( 1 << i)) {
- BVHN<N>::prefetch(node0->child(i),BVH_FLAG_ALIGNED_NODE);
- collide_recurse(node0->child(i),node0->bounds(i),ref1,bounds1,depth0+1,depth1);
- }
- });
- }
- else
-#endif
- {
- for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
- BVHN<N>::prefetch(node0->child(i),BVH_FLAG_ALIGNED_NODE);
- collide_recurse(node0->child(i),node0->bounds(i),ref1,bounds1,depth0+1,depth1);
- }
- }
- return;
- }
-
- {
- recurse_node1:
- AABBNode* node1 = ref1.getAABBNode();
- size_t mask = overlap<N>(bounds0,*node1);
- //for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
- //for (size_t i=0; i<N; i++) {
-#if 0
- if (depth1 < parallel_depth_threshold)
- {
- parallel_for(size_t(N), [&] ( size_t i ) {
- if (mask & ( 1 << i)) {
- BVHN<N>::prefetch(node1->child(i),BVH_FLAG_ALIGNED_NODE);
- collide_recurse(ref0,bounds0,node1->child(i),node1->bounds(i),depth0,depth1+1);
- }
- });
- }
- else
-#endif
- {
- for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
- BVHN<N>::prefetch(node1->child(i),BVH_FLAG_ALIGNED_NODE);
- collide_recurse(ref0,bounds0,node1->child(i),node1->bounds(i),depth0,depth1+1);
- }
- }
- return;
- }
- }
-
- template<int N>
- void BVHNCollider<N>::split(const CollideJob& job, jobvector& jobs)
- {
- if (unlikely(job.ref0.isLeaf())) {
- if (unlikely(job.ref1.isLeaf())) {
- jobs.push_back(job);
- return;
- } else goto recurse_node1;
- } else {
- if (unlikely(job.ref1.isLeaf())) {
- goto recurse_node0;
- } else {
- if (area(job.bounds0) > area(job.bounds1)) {
- goto recurse_node0;
- }
- else {
- goto recurse_node1;
- }
- }
- }
-
- {
- recurse_node0:
- const AABBNode* node0 = job.ref0.getAABBNode();
- size_t mask = overlap<N>(job.bounds1,*node0);
- for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
- jobs.push_back(CollideJob(node0->child(i),node0->bounds(i),job.depth0+1,job.ref1,job.bounds1,job.depth1));
- }
- return;
- }
-
- {
- recurse_node1:
- const AABBNode* node1 = job.ref1.getAABBNode();
- size_t mask = overlap<N>(job.bounds0,*node1);
- for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
- jobs.push_back(CollideJob(job.ref0,job.bounds0,job.depth0,node1->child(i),node1->bounds(i),job.depth1+1));
- }
- return;
- }
- }
-
- template<int N>
- void BVHNCollider<N>::collide_recurse_entry(NodeRef ref0, const BBox3fa& bounds0, NodeRef ref1, const BBox3fa& bounds1)
- {
- CSTAT(bvh_collide_traversal_steps = 0);
- CSTAT(bvh_collide_leaf_pairs = 0);
- CSTAT(bvh_collide_leaf_iterations = 0);
- CSTAT(bvh_collide_prim_intersections1 = 0);
- CSTAT(bvh_collide_prim_intersections2 = 0);
- CSTAT(bvh_collide_prim_intersections3 = 0);
- CSTAT(bvh_collide_prim_intersections4 = 0);
- CSTAT(bvh_collide_prim_intersections5 = 0);
- CSTAT(bvh_collide_prim_intersections = 0);
-#if 0
- collide_recurse(ref0,bounds0,ref1,bounds1,0,0);
-#else
- const int M = 2048;
- jobvector jobs[2];
- jobs[0].reserve(M);
- jobs[1].reserve(M);
- jobs[0].push_back(CollideJob(ref0,bounds0,0,ref1,bounds1,0));
- int source = 0;
- int target = 1;
-
- /* try to split job until job list is full */
- while (jobs[source].size()+8 <= M)
- {
- for (size_t i=0; i<jobs[source].size(); i++)
- {
- const CollideJob& job = jobs[source][i];
- size_t remaining = jobs[source].size()-i;
- if (jobs[target].size()+remaining+8 > M) {
- jobs[target].push_back(job);
- } else {
- split(job,jobs[target]);
- }
- }
-
- /* stop splitting jobs if we reached only leaves and cannot make progress anymore */
- if (jobs[target].size() == jobs[source].size())
- break;
-
- jobs[source].resize(0);
- std::swap(source,target);
- }
-
- /* parallel processing of all jobs */
- parallel_for(size_t(jobs[source].size()), [&] ( size_t i ) {
- CollideJob& j = jobs[source][i];
- collide_recurse(j.ref0,j.bounds0,j.ref1,j.bounds1,j.depth0,j.depth1);
- });
-
-
-#endif
- CSTAT(PRINT(bvh_collide_traversal_steps));
- CSTAT(PRINT(bvh_collide_leaf_pairs));
- CSTAT(PRINT(bvh_collide_leaf_iterations));
- CSTAT(PRINT(bvh_collide_prim_intersections1));
- CSTAT(PRINT(bvh_collide_prim_intersections2));
- CSTAT(PRINT(bvh_collide_prim_intersections3));
- CSTAT(PRINT(bvh_collide_prim_intersections4));
- CSTAT(PRINT(bvh_collide_prim_intersections5));
- CSTAT(PRINT(bvh_collide_prim_intersections));
- }
-
- template<int N>
- void BVHNColliderUserGeom<N>::collide(BVH* __restrict__ bvh0, BVH* __restrict__ bvh1, RTCCollideFunc callback, void* userPtr)
- {
- BVHNColliderUserGeom<N>(bvh0->scene,bvh1->scene,callback,userPtr).
- collide_recurse_entry(bvh0->root,bvh0->bounds.bounds(),bvh1->root,bvh1->bounds.bounds());
- }
-
-#if defined (EMBREE_LOWEST_ISA)
- struct collision_regression_test : public RegressionTest
- {
- collision_regression_test(const char* name) : RegressionTest(name) {
- registerRegressionTest(this);
- }
-
- bool run ()
- {
- bool passed = true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(-0.008815f, 0.041848f, -2.49875e-06f), Vec3fa(-0.008276f, 0.053318f, -2.49875e-06f), Vec3fa(0.003023f, 0.048969f, -2.49875e-06f),
- Vec3fa(0.00245f, 0.037612f, -2.49875e-06f), Vec3fa(0.01434f, 0.042634f, -2.49875e-06f), Vec3fa(0.013499f, 0.031309f, -2.49875e-06f)) == false;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,1),Vec3fa(1,0,1),Vec3fa(0,1,1)) == false;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,1),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,-0.1f),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(0.5f,0,0),Vec3fa(0,0.5f,0)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(0.5f,0,0),Vec3fa(0,0.5f,0)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,-0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(-0.1f,0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
- Vec3fa(-1,1,0) + Vec3fa(0,0,0),Vec3fa(-1,1,0) + Vec3fa(0.1f,0,0),Vec3fa(-1,1,0) + Vec3fa(0,0.1f,0)) == false;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
- Vec3fa( 2,0.5f,0) + Vec3fa(0,0,0),Vec3fa( 2,0.5f,0) + Vec3fa(0.1f,0,0),Vec3fa( 2,0.5f,0) + Vec3fa(0,0.1f,0)) == false;
- passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
- Vec3fa(0.5f,-2.0f,0) + Vec3fa(0,0,0),Vec3fa(0.5f,-2.0f,0) + Vec3fa(0.1f,0,0),Vec3fa(0.5f,-2.0f,0) + Vec3fa(0,0.1f,0)) == false;
- return passed;
- }
- };
-
- collision_regression_test collision_regression("collision_regression_test");
-#endif
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Collider Definitions
- ////////////////////////////////////////////////////////////////////////////////
-
- DEFINE_COLLIDER(BVH4ColliderUserGeom,BVHNColliderUserGeom<4>);
-
-#if defined(__AVX__)
- DEFINE_COLLIDER(BVH8ColliderUserGeom,BVHNColliderUserGeom<8>);
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.h
deleted file mode 100644
index ac4f99c96a..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.h
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/object.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int N>
- class BVHNCollider
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::AABBNode AABBNode;
-
- struct CollideJob
- {
- CollideJob () {}
-
- CollideJob (NodeRef ref0, const BBox3fa& bounds0, size_t depth0,
- NodeRef ref1, const BBox3fa& bounds1, size_t depth1)
- : ref0(ref0), bounds0(bounds0), depth0(depth0), ref1(ref1), bounds1(bounds1), depth1(depth1) {}
-
- NodeRef ref0;
- BBox3fa bounds0;
- size_t depth0;
- NodeRef ref1;
- BBox3fa bounds1;
- size_t depth1;
- };
-
- typedef vector_t<CollideJob, aligned_allocator<CollideJob,16>> jobvector;
-
- void split(const CollideJob& job, jobvector& jobs);
-
- public:
- __forceinline BVHNCollider (Scene* scene0, Scene* scene1, RTCCollideFunc callback, void* userPtr)
- : scene0(scene0), scene1(scene1), callback(callback), userPtr(userPtr) {}
-
- public:
- virtual void processLeaf(NodeRef leaf0, NodeRef leaf1) = 0;
- void collide_recurse(NodeRef node0, const BBox3fa& bounds0, NodeRef node1, const BBox3fa& bounds1, size_t depth0, size_t depth1);
- void collide_recurse_entry(NodeRef node0, const BBox3fa& bounds0, NodeRef node1, const BBox3fa& bounds1);
-
- protected:
- Scene* scene0;
- Scene* scene1;
- RTCCollideFunc callback;
- void* userPtr;
- };
-
- template<int N>
- class BVHNColliderUserGeom : public BVHNCollider<N>
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::AABBNode AABBNode;
-
- __forceinline BVHNColliderUserGeom (Scene* scene0, Scene* scene1, RTCCollideFunc callback, void* userPtr)
- : BVHNCollider<N>(scene0,scene1,callback,userPtr) {}
-
- virtual void processLeaf(NodeRef leaf0, NodeRef leaf1);
- public:
- static void collide(BVH* __restrict__ bvh0, BVH* __restrict__ bvh1, RTCCollideFunc callback, void* userPtr);
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_factory.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_factory.h
deleted file mode 100644
index 54021ca6eb..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_factory.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../bvh/bvh.h"
-#include "../common/isa.h"
-#include "../common/accel.h"
-#include "../common/scene.h"
-#include "../geometry/curve_intersector_virtual.h"
-
-namespace embree
-{
- /*! BVH instantiations */
- class BVHFactory
- {
- public:
- enum class BuildVariant { STATIC, DYNAMIC, HIGH_QUALITY };
- enum class IntersectVariant { FAST, ROBUST };
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.cpp
deleted file mode 100644
index ea6adc2717..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.cpp
+++ /dev/null
@@ -1,330 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh_intersector1.h"
-#include "node_intersector1.h"
-#include "bvh_traverser1.h"
-
-#include "../geometry/intersector_iterators.h"
-#include "../geometry/triangle_intersector.h"
-#include "../geometry/trianglev_intersector.h"
-#include "../geometry/trianglev_mb_intersector.h"
-#include "../geometry/trianglei_intersector.h"
-#include "../geometry/quadv_intersector.h"
-#include "../geometry/quadi_intersector.h"
-#include "../geometry/curveNv_intersector.h"
-#include "../geometry/curveNi_intersector.h"
-#include "../geometry/curveNi_mb_intersector.h"
-#include "../geometry/linei_intersector.h"
-#include "../geometry/subdivpatch1_intersector.h"
-#include "../geometry/object_intersector.h"
-#include "../geometry/instance_intersector.h"
-#include "../geometry/subgrid_intersector.h"
-#include "../geometry/subgrid_mb_intersector.h"
-#include "../geometry/curve_intersector_virtual.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int N, int types, bool robust, typename PrimitiveIntersector1>
- void BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::intersect(const Accel::Intersectors* __restrict__ This,
- RayHit& __restrict__ ray,
- IntersectContext* __restrict__ context)
- {
- const BVH* __restrict__ bvh = (const BVH*)This->ptr;
-
- /* we may traverse an empty BVH in case all geometry was invalid */
- if (bvh->root == BVH::emptyNode)
- return;
-
- /* perform per ray precalculations required by the primitive intersector */
- Precalculations pre(ray, bvh);
-
- /* stack state */
- StackItemT<NodeRef> stack[stackSize]; // stack of nodes
- StackItemT<NodeRef>* stackPtr = stack+1; // current stack pointer
- StackItemT<NodeRef>* stackEnd = stack+stackSize;
- stack[0].ptr = bvh->root;
- stack[0].dist = neg_inf;
-
- if (bvh->root == BVH::emptyNode)
- return;
-
- /* filter out invalid rays */
-#if defined(EMBREE_IGNORE_INVALID_RAYS)
- if (!ray.valid()) return;
-#endif
- /* verify correct input */
- assert(ray.valid());
- assert(ray.tnear() >= 0.0f);
- assert(!(types & BVH_MB) || (ray.time() >= 0.0f && ray.time() <= 1.0f));
-
- /* load the ray into SIMD registers */
- TravRay<N,Nx,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f));
-
- /* initialize the node traverser */
- BVHNNodeTraverser1Hit<N, Nx, types> nodeTraverser;
-
- /* pop loop */
- while (true) pop:
- {
- /* pop next node */
- if (unlikely(stackPtr == stack)) break;
- stackPtr--;
- NodeRef cur = NodeRef(stackPtr->ptr);
-
- /* if popped node is too far, pop next one */
-#if defined(__AVX512ER__)
- /* much faster on KNL */
- if (unlikely(any(vfloat<Nx>(*(float*)&stackPtr->dist) > tray.tfar)))
- continue;
-#else
- if (unlikely(*(float*)&stackPtr->dist > ray.tfar))
- continue;
-#endif
-
- /* downtraversal loop */
- while (true)
- {
- /* intersect node */
- size_t mask; vfloat<Nx> tNear;
- STAT3(normal.trav_nodes,1,1,1);
- bool nodeIntersected = BVHNNodeIntersector1<N, Nx, types, robust>::intersect(cur, tray, ray.time(), tNear, mask);
- if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
-
- /* if no child is hit, pop next node */
- if (unlikely(mask == 0))
- goto pop;
-
- /* select next child and push other children */
- nodeTraverser.traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
- }
-
- /* this is a leaf node */
- assert(cur != BVH::emptyNode);
- STAT3(normal.trav_leaves,1,1,1);
- size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
- size_t lazy_node = 0;
- PrimitiveIntersector1::intersect(This, pre, ray, context, prim, num, tray, lazy_node);
- tray.tfar = ray.tfar;
-
- /* push lazy node onto stack */
- if (unlikely(lazy_node)) {
- stackPtr->ptr = lazy_node;
- stackPtr->dist = neg_inf;
- stackPtr++;
- }
- }
- }
-
- template<int N, int types, bool robust, typename PrimitiveIntersector1>
- void BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::occluded(const Accel::Intersectors* __restrict__ This,
- Ray& __restrict__ ray,
- IntersectContext* __restrict__ context)
- {
- const BVH* __restrict__ bvh = (const BVH*)This->ptr;
-
- /* we may traverse an empty BVH in case all geometry was invalid */
- if (bvh->root == BVH::emptyNode)
- return;
-
- /* early out for already occluded rays */
- if (unlikely(ray.tfar < 0.0f))
- return;
-
- /* perform per ray precalculations required by the primitive intersector */
- Precalculations pre(ray, bvh);
-
- /* stack state */
- NodeRef stack[stackSize]; // stack of nodes that still need to get traversed
- NodeRef* stackPtr = stack+1; // current stack pointer
- NodeRef* stackEnd = stack+stackSize;
- stack[0] = bvh->root;
-
- /* filter out invalid rays */
-#if defined(EMBREE_IGNORE_INVALID_RAYS)
- if (!ray.valid()) return;
-#endif
-
- /* verify correct input */
- assert(ray.valid());
- assert(ray.tnear() >= 0.0f);
- assert(!(types & BVH_MB) || (ray.time() >= 0.0f && ray.time() <= 1.0f));
-
- /* load the ray into SIMD registers */
- TravRay<N,Nx,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f));
-
- /* initialize the node traverser */
- BVHNNodeTraverser1Hit<N, Nx, types> nodeTraverser;
-
- /* pop loop */
- while (true) pop:
- {
- /* pop next node */
- if (unlikely(stackPtr == stack)) break;
- stackPtr--;
- NodeRef cur = (NodeRef)*stackPtr;
-
- /* downtraversal loop */
- while (true)
- {
- /* intersect node */
- size_t mask; vfloat<Nx> tNear;
- STAT3(shadow.trav_nodes,1,1,1);
- bool nodeIntersected = BVHNNodeIntersector1<N, Nx, types, robust>::intersect(cur, tray, ray.time(), tNear, mask);
- if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
-
- /* if no child is hit, pop next node */
- if (unlikely(mask == 0))
- goto pop;
-
- /* select next child and push other children */
- nodeTraverser.traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
- }
-
- /* this is a leaf node */
- assert(cur != BVH::emptyNode);
- STAT3(shadow.trav_leaves,1,1,1);
- size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
- size_t lazy_node = 0;
- if (PrimitiveIntersector1::occluded(This, pre, ray, context, prim, num, tray, lazy_node)) {
- ray.tfar = neg_inf;
- break;
- }
-
- /* push lazy node onto stack */
- if (unlikely(lazy_node)) {
- *stackPtr = (NodeRef)lazy_node;
- stackPtr++;
- }
- }
- }
-
- template<int N, int types, bool robust, typename PrimitiveIntersector1>
- struct PointQueryDispatch
- {
- typedef typename PrimitiveIntersector1::Precalculations Precalculations;
- typedef typename PrimitiveIntersector1::Primitive Primitive;
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::AABBNode AABBNode;
- typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
-
- static const size_t stackSize = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
-
- /* right now AVX512KNL SIMD extension only for standard node types */
- static const size_t Nx = (types == BVH_AN1 || types == BVH_QN1) ? vextend<N>::size : N;
-
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context)
- {
- const BVH* __restrict__ bvh = (const BVH*)This->ptr;
-
- /* we may traverse an empty BVH in case all geometry was invalid */
- if (bvh->root == BVH::emptyNode)
- return false;
-
- /* stack state */
- StackItemT<NodeRef> stack[stackSize]; // stack of nodes
- StackItemT<NodeRef>* stackPtr = stack+1; // current stack pointer
- StackItemT<NodeRef>* stackEnd = stack+stackSize;
- stack[0].ptr = bvh->root;
- stack[0].dist = neg_inf;
-
- /* verify correct input */
- assert(!(types & BVH_MB) || (query->time >= 0.0f && query->time <= 1.0f));
-
- /* load the point query into SIMD registers */
- TravPointQuery<N> tquery(query->p, context->query_radius);
-
- /* initialize the node traverser */
- BVHNNodeTraverser1Hit<N, N, types> nodeTraverser;
-
- bool changed = false;
- float cull_radius = context->query_type == POINT_QUERY_TYPE_SPHERE
- ? query->radius * query->radius
- : dot(context->query_radius, context->query_radius);
-
- /* pop loop */
- while (true) pop:
- {
- /* pop next node */
- if (unlikely(stackPtr == stack)) break;
- stackPtr--;
- NodeRef cur = NodeRef(stackPtr->ptr);
-
- /* if popped node is too far, pop next one */
- if (unlikely(*(float*)&stackPtr->dist > cull_radius))
- continue;
-
- /* downtraversal loop */
- while (true)
- {
- /* intersect node */
- size_t mask; vfloat<N> tNear;
- STAT3(point_query.trav_nodes,1,1,1);
- bool nodeIntersected;
- if (likely(context->query_type == POINT_QUERY_TYPE_SPHERE)) {
- nodeIntersected = BVHNNodePointQuerySphere1<N, types>::pointQuery(cur, tquery, query->time, tNear, mask);
- } else {
- nodeIntersected = BVHNNodePointQueryAABB1 <N, types>::pointQuery(cur, tquery, query->time, tNear, mask);
- }
- if (unlikely(!nodeIntersected)) { STAT3(point_query.trav_nodes,-1,-1,-1); break; }
-
- /* if no child is hit, pop next node */
- if (unlikely(mask == 0))
- goto pop;
-
- /* select next child and push other children */
- nodeTraverser.traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
- }
-
- /* this is a leaf node */
- assert(cur != BVH::emptyNode);
- STAT3(point_query.trav_leaves,1,1,1);
- size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
- size_t lazy_node = 0;
- if (PrimitiveIntersector1::pointQuery(This, query, context, prim, num, tquery, lazy_node))
- {
- changed = true;
- tquery.rad = context->query_radius;
- cull_radius = context->query_type == POINT_QUERY_TYPE_SPHERE
- ? query->radius * query->radius
- : dot(context->query_radius, context->query_radius);
- }
-
- /* push lazy node onto stack */
- if (unlikely(lazy_node)) {
- stackPtr->ptr = lazy_node;
- stackPtr->dist = neg_inf;
- stackPtr++;
- }
- }
- return changed;
- }
- };
-
- /* disable point queries for not yet supported geometry types */
- template<int N, int types, bool robust>
- struct PointQueryDispatch<N, types, robust, VirtualCurveIntersector1> {
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
- };
-
- template<int N, int types, bool robust>
- struct PointQueryDispatch<N, types, robust, SubdivPatch1Intersector1> {
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
- };
-
- template<int N, int types, bool robust>
- struct PointQueryDispatch<N, types, robust, SubdivPatch1MBIntersector1> {
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
- };
-
- template<int N, int types, bool robust, typename PrimitiveIntersector1>
- bool BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::pointQuery(
- const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context)
- {
- return PointQueryDispatch<N, types, robust, PrimitiveIntersector1>::pointQuery(This, query, context);
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.h
deleted file mode 100644
index 1a269c319a..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.h
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-#include "../common/ray.h"
-#include "../common/point_query.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! BVH single ray intersector. */
- template<int N, int types, bool robust, typename PrimitiveIntersector1>
- class BVHNIntersector1
- {
- /* shortcuts for frequently used types */
- typedef typename PrimitiveIntersector1::Precalculations Precalculations;
- typedef typename PrimitiveIntersector1::Primitive Primitive;
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::AABBNode AABBNode;
- typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
-
- static const size_t stackSize = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
-
- /* right now AVX512KNL SIMD extension only for standard node types */
- static const size_t Nx = (types == BVH_AN1 || types == BVH_QN1) ? vextend<N>::size : N;
-
- public:
- static void intersect (const Accel::Intersectors* This, RayHit& ray, IntersectContext* context);
- static void occluded (const Accel::Intersectors* This, Ray& ray, IntersectContext* context);
- static bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context);
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1_bvh4.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1_bvh4.cpp
deleted file mode 100644
index 989f7354fd..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1_bvh4.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh_intersector1.cpp"
-
-namespace embree
-{
- namespace isa
- {
- int getISA() {
- return VerifyMultiTargetLinking::getISA();
- }
-
- ////////////////////////////////////////////////////////////////////////////////
- /// BVH4Intersector1 Definitions
- ////////////////////////////////////////////////////////////////////////////////
-
- IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersector1,BVHNIntersector1<4 COMMA BVH_AN1_UN1 COMMA false COMMA VirtualCurveIntersector1 >));
- IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersector1MB,BVHNIntersector1<4 COMMA BVH_AN2_AN4D_UN2 COMMA false COMMA VirtualCurveIntersector1 >));
-
- IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersectorRobust1,BVHNIntersector1<4 COMMA BVH_AN1_UN1 COMMA true COMMA VirtualCurveIntersector1 >));
- IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersectorRobust1MB,BVHNIntersector1<4 COMMA BVH_AN2_AN4D_UN2 COMMA true COMMA VirtualCurveIntersector1 >));
-
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4Intersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMIntersector1Moeller <SIMD_MODE(4) COMMA true> > >));
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Moeller <SIMD_MODE(4) COMMA true> > >));
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMvIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >));
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >));
-
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMvMBIntersector1Moeller <SIMD_MODE(4) COMMA true> > >));
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMiMBIntersector1Moeller <SIMD_MODE(4) COMMA true> > >));
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMvMBIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >));
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMiMBIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >));
-
- IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4vIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<QuadMvIntersector1Moeller <4 COMMA true> > >));
- IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<QuadMiIntersector1Moeller <4 COMMA true> > >));
- IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4vIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<QuadMvIntersector1Pluecker<4 COMMA true> > >));
- IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<QuadMiIntersector1Pluecker<4 COMMA true> > >));
-
- IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<QuadMiMBIntersector1Moeller <4 COMMA true> > >));
- IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<QuadMiMBIntersector1Pluecker<4 COMMA true> > >));
-
- IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR1(BVH4SubdivPatch1Intersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector1>));
- IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR1(BVH4SubdivPatch1MBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA SubdivPatch1MBIntersector1>));
-
- IF_ENABLED_USER(DEFINE_INTERSECTOR1(BVH4VirtualIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<ObjectIntersector1<false>> >));
- IF_ENABLED_USER(DEFINE_INTERSECTOR1(BVH4VirtualMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<ObjectIntersector1<true>> >));
-
- IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR1(BVH4InstanceIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<InstanceIntersector1> >));
- IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR1(BVH4InstanceMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<InstanceIntersector1MB> >));
-
- IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(QBVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_QN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >));
- IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(QBVH4Quad4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_QN1 COMMA false COMMA ArrayIntersector1<QuadMiIntersector1Pluecker<4 COMMA true> > >));
-
- IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridIntersector1Moeller,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersector1Moeller<4 COMMA true> >));
- IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridMBIntersector1Moeller,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA SubGridMBIntersector1Pluecker<4 COMMA true> >));
-
- IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA SubGridIntersector1Pluecker<4 COMMA true> >));
- //IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA SubGridMBIntersector1Pluecker<4 COMMA true> >));
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_hybrid.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_hybrid.h
deleted file mode 100644
index d764cc928d..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_hybrid.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-#include "../common/ray.h"
-#include "../common/stack_item.h"
-#include "node_intersector_frustum.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int K, bool robust>
- struct TravRayK;
-
- /*! BVH hybrid packet intersector. Switches between packet and single ray traversal (optional). */
- template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single = true>
- class BVHNIntersectorKHybrid
- {
- /* right now AVX512KNL SIMD extension only for standard node types */
- static const size_t Nx = types == BVH_AN1 ? vextend<N>::size : N;
-
- /* shortcuts for frequently used types */
- typedef typename PrimitiveIntersectorK::Precalculations Precalculations;
- typedef typename PrimitiveIntersectorK::Primitive Primitive;
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::BaseNode BaseNode;
- typedef typename BVH::AABBNode AABBNode;
-
- static const size_t stackSizeSingle = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
- static const size_t stackSizeChunk = 1+(N-1)*BVH::maxDepth;
-
- static const size_t switchThresholdIncoherent = \
- (K==4) ? 3 :
- (K==8) ? ((N==4) ? 5 : 7) :
- (K==16) ? 14 : // 14 seems to work best for KNL due to better ordered chunk traversal
- 0;
-
- private:
- static void intersect1(Accel::Intersectors* This, const BVH* bvh, NodeRef root, size_t k, Precalculations& pre,
- RayHitK<K>& ray, const TravRayK<K, robust>& tray, IntersectContext* context);
- static bool occluded1(Accel::Intersectors* This, const BVH* bvh, NodeRef root, size_t k, Precalculations& pre,
- RayK<K>& ray, const TravRayK<K, robust>& tray, IntersectContext* context);
-
- public:
- static void intersect(vint<K>* valid, Accel::Intersectors* This, RayHitK<K>& ray, IntersectContext* context);
- static void occluded (vint<K>* valid, Accel::Intersectors* This, RayK<K>& ray, IntersectContext* context);
-
- static void intersectCoherent(vint<K>* valid, Accel::Intersectors* This, RayHitK<K>& ray, IntersectContext* context);
- static void occludedCoherent (vint<K>* valid, Accel::Intersectors* This, RayK<K>& ray, IntersectContext* context);
-
- };
-
- /*! BVH packet intersector. */
- template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK>
- class BVHNIntersectorKChunk : public BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, false> {};
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream.h
deleted file mode 100644
index 83d1fb4d3d..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream.h
+++ /dev/null
@@ -1,295 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "node_intersector_packet_stream.h"
-#include "node_intersector_frustum.h"
-#include "bvh_traverser_stream.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! BVH ray stream intersector. */
- template<int N, int Nx, int types, bool robust, typename PrimitiveIntersector>
- class BVHNIntersectorStream
- {
- static const int Nxd = (Nx == N) ? N : Nx/2;
-
- /* shortcuts for frequently used types */
- template<int K> using PrimitiveIntersectorK = typename PrimitiveIntersector::template Type<K>;
- template<int K> using PrimitiveK = typename PrimitiveIntersectorK<K>::PrimitiveK;
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::BaseNode BaseNode;
- typedef typename BVH::AABBNode AABBNode;
- typedef typename BVH::AABBNodeMB AABBNodeMB;
-
- template<int K>
- __forceinline static size_t initPacketsAndFrustum(RayK<K>** inputPackets, size_t numOctantRays,
- TravRayKStream<K, robust>* packets, Frustum<robust>& frustum, bool& commonOctant)
- {
- const size_t numPackets = (numOctantRays+K-1)/K;
-
- Vec3vf<K> tmp_min_rdir(pos_inf);
- Vec3vf<K> tmp_max_rdir(neg_inf);
- Vec3vf<K> tmp_min_org(pos_inf);
- Vec3vf<K> tmp_max_org(neg_inf);
- vfloat<K> tmp_min_dist(pos_inf);
- vfloat<K> tmp_max_dist(neg_inf);
-
- size_t m_active = 0;
- for (size_t i = 0; i < numPackets; i++)
- {
- const vfloat<K> tnear = inputPackets[i]->tnear();
- const vfloat<K> tfar = inputPackets[i]->tfar;
- vbool<K> m_valid = (tnear <= tfar) & (tnear >= 0.0f);
-
-#if defined(EMBREE_IGNORE_INVALID_RAYS)
- m_valid &= inputPackets[i]->valid();
-#endif
-
- m_active |= (size_t)movemask(m_valid) << (i*K);
-
- vfloat<K> packet_min_dist = max(tnear, 0.0f);
- vfloat<K> packet_max_dist = select(m_valid, tfar, neg_inf);
- tmp_min_dist = min(tmp_min_dist, packet_min_dist);
- tmp_max_dist = max(tmp_max_dist, packet_max_dist);
-
- const Vec3vf<K>& org = inputPackets[i]->org;
- const Vec3vf<K>& dir = inputPackets[i]->dir;
-
- new (&packets[i]) TravRayKStream<K, robust>(org, dir, packet_min_dist, packet_max_dist);
-
- tmp_min_rdir = min(tmp_min_rdir, select(m_valid, packets[i].rdir, Vec3vf<K>(pos_inf)));
- tmp_max_rdir = max(tmp_max_rdir, select(m_valid, packets[i].rdir, Vec3vf<K>(neg_inf)));
- tmp_min_org = min(tmp_min_org , select(m_valid,org , Vec3vf<K>(pos_inf)));
- tmp_max_org = max(tmp_max_org , select(m_valid,org , Vec3vf<K>(neg_inf)));
- }
-
- m_active &= (numOctantRays == (8 * sizeof(size_t))) ? (size_t)-1 : (((size_t)1 << numOctantRays)-1);
-
-
- const Vec3fa reduced_min_rdir(reduce_min(tmp_min_rdir.x),
- reduce_min(tmp_min_rdir.y),
- reduce_min(tmp_min_rdir.z));
-
- const Vec3fa reduced_max_rdir(reduce_max(tmp_max_rdir.x),
- reduce_max(tmp_max_rdir.y),
- reduce_max(tmp_max_rdir.z));
-
- const Vec3fa reduced_min_origin(reduce_min(tmp_min_org.x),
- reduce_min(tmp_min_org.y),
- reduce_min(tmp_min_org.z));
-
- const Vec3fa reduced_max_origin(reduce_max(tmp_max_org.x),
- reduce_max(tmp_max_org.y),
- reduce_max(tmp_max_org.z));
-
- commonOctant =
- (reduced_max_rdir.x < 0.0f || reduced_min_rdir.x >= 0.0f) &&
- (reduced_max_rdir.y < 0.0f || reduced_min_rdir.y >= 0.0f) &&
- (reduced_max_rdir.z < 0.0f || reduced_min_rdir.z >= 0.0f);
-
- const float frustum_min_dist = reduce_min(tmp_min_dist);
- const float frustum_max_dist = reduce_max(tmp_max_dist);
-
- frustum.init(reduced_min_origin, reduced_max_origin,
- reduced_min_rdir, reduced_max_rdir,
- frustum_min_dist, frustum_max_dist,
- N);
-
- return m_active;
- }
-
- template<int K>
- __forceinline static size_t intersectAABBNodePacket(size_t m_active,
- const TravRayKStream<K,robust>* packets,
- const AABBNode* __restrict__ node,
- size_t boxID,
- const NearFarPrecalculations& nf)
- {
- assert(m_active);
- const size_t startPacketID = bsf(m_active) / K;
- const size_t endPacketID = bsr(m_active) / K;
- size_t m_trav_active = 0;
- for (size_t i = startPacketID; i <= endPacketID; i++)
- {
- const size_t m_hit = intersectNodeK<N>(node, boxID, packets[i], nf);
- m_trav_active |= m_hit << (i*K);
- }
- return m_trav_active;
- }
-
- template<int K>
- __forceinline static size_t traverseCoherentStream(size_t m_active,
- TravRayKStream<K, robust>* packets,
- const AABBNode* __restrict__ node,
- const Frustum<robust>& frustum,
- size_t* maskK,
- vfloat<Nx>& dist)
- {
- size_t m_node_hit = intersectNodeFrustum<N,Nx>(node, frustum, dist);
- const size_t first_index = bsf(m_active);
- const size_t first_packetID = first_index / K;
- const size_t first_rayID = first_index % K;
- size_t m_first_hit = intersectNode1<N,Nx>(node, packets[first_packetID], first_rayID, frustum.nf);
-
- /* this make traversal independent of the ordering of rays */
- size_t m_node = m_node_hit ^ m_first_hit;
- while (unlikely(m_node))
- {
- const size_t boxID = bscf(m_node);
- const size_t m_current = m_active & intersectAABBNodePacket(m_active, packets, node, boxID, frustum.nf);
- m_node_hit ^= m_current ? (size_t)0 : ((size_t)1 << boxID);
- maskK[boxID] = m_current;
- }
- return m_node_hit;
- }
-
- // TODO: explicit 16-wide path for KNL
- template<int K>
- __forceinline static vint<Nx> traverseIncoherentStream(size_t m_active,
- TravRayKStreamFast<K>* __restrict__ packets,
- const AABBNode* __restrict__ node,
- const NearFarPrecalculations& nf,
- const int shiftTable[32])
- {
- const vfloat<Nx> bminX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX));
- const vfloat<Nx> bminY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY));
- const vfloat<Nx> bminZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ));
- const vfloat<Nx> bmaxX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX));
- const vfloat<Nx> bmaxY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY));
- const vfloat<Nx> bmaxZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ));
- assert(m_active);
- vint<Nx> vmask(zero);
- do
- {
- STAT3(shadow.trav_nodes,1,1,1);
- const size_t rayID = bscf(m_active);
- assert(rayID < MAX_INTERNAL_STREAM_SIZE);
- TravRayKStream<K,robust> &p = packets[rayID / K];
- const size_t i = rayID % K;
- const vint<Nx> bitmask(shiftTable[rayID]);
-
-#if defined (__aarch64__)
- const vfloat<Nx> tNearX = madd(bminX, p.rdir.x[i], p.neg_org_rdir.x[i]);
- const vfloat<Nx> tNearY = madd(bminY, p.rdir.y[i], p.neg_org_rdir.y[i]);
- const vfloat<Nx> tNearZ = madd(bminZ, p.rdir.z[i], p.neg_org_rdir.z[i]);
- const vfloat<Nx> tFarX = madd(bmaxX, p.rdir.x[i], p.neg_org_rdir.x[i]);
- const vfloat<Nx> tFarY = madd(bmaxY, p.rdir.y[i], p.neg_org_rdir.y[i]);
- const vfloat<Nx> tFarZ = madd(bmaxZ, p.rdir.z[i], p.neg_org_rdir.z[i]);
-#else
- const vfloat<Nx> tNearX = msub(bminX, p.rdir.x[i], p.org_rdir.x[i]);
- const vfloat<Nx> tNearY = msub(bminY, p.rdir.y[i], p.org_rdir.y[i]);
- const vfloat<Nx> tNearZ = msub(bminZ, p.rdir.z[i], p.org_rdir.z[i]);
- const vfloat<Nx> tFarX = msub(bmaxX, p.rdir.x[i], p.org_rdir.x[i]);
- const vfloat<Nx> tFarY = msub(bmaxY, p.rdir.y[i], p.org_rdir.y[i]);
- const vfloat<Nx> tFarZ = msub(bmaxZ, p.rdir.z[i], p.org_rdir.z[i]);
-#endif
-
- const vfloat<Nx> tNear = maxi(tNearX, tNearY, tNearZ, vfloat<Nx>(p.tnear[i]));
- const vfloat<Nx> tFar = mini(tFarX , tFarY , tFarZ, vfloat<Nx>(p.tfar[i]));
-
-#if defined(__AVX512ER__)
- const vboolx m_node((1 << N)-1);
- const vbool<Nx> hit_mask = le(m_node, tNear, tFar);
- vmask = mask_or(hit_mask, vmask, vmask, bitmask);
-#else
- const vbool<Nx> hit_mask = tNear <= tFar;
-#if defined(__AVX2__)
- vmask = vmask | (bitmask & vint<Nx>(hit_mask));
-#else
- vmask = select(hit_mask, vmask | bitmask, vmask);
-#endif
-#endif
- } while(m_active);
- return vmask;
- }
-
- template<int K>
- __forceinline static vint<Nx> traverseIncoherentStream(size_t m_active,
- TravRayKStreamRobust<K>* __restrict__ packets,
- const AABBNode* __restrict__ node,
- const NearFarPrecalculations& nf,
- const int shiftTable[32])
- {
- const vfloat<Nx> bminX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX));
- const vfloat<Nx> bminY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY));
- const vfloat<Nx> bminZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ));
- const vfloat<Nx> bmaxX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX));
- const vfloat<Nx> bmaxY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY));
- const vfloat<Nx> bmaxZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ));
- assert(m_active);
- vint<Nx> vmask(zero);
- do
- {
- STAT3(shadow.trav_nodes,1,1,1);
- const size_t rayID = bscf(m_active);
- assert(rayID < MAX_INTERNAL_STREAM_SIZE);
- TravRayKStream<K,robust> &p = packets[rayID / K];
- const size_t i = rayID % K;
- const vint<Nx> bitmask(shiftTable[rayID]);
- const vfloat<Nx> tNearX = (bminX - p.org.x[i]) * p.rdir.x[i];
- const vfloat<Nx> tNearY = (bminY - p.org.y[i]) * p.rdir.y[i];
- const vfloat<Nx> tNearZ = (bminZ - p.org.z[i]) * p.rdir.z[i];
- const vfloat<Nx> tFarX = (bmaxX - p.org.x[i]) * p.rdir.x[i];
- const vfloat<Nx> tFarY = (bmaxY - p.org.y[i]) * p.rdir.y[i];
- const vfloat<Nx> tFarZ = (bmaxZ - p.org.z[i]) * p.rdir.z[i];
- const vfloat<Nx> tNear = maxi(tNearX, tNearY, tNearZ, vfloat<Nx>(p.tnear[i]));
- const vfloat<Nx> tFar = mini(tFarX , tFarY , tFarZ, vfloat<Nx>(p.tfar[i]));
- const float round_down = 1.0f-2.0f*float(ulp);
- const float round_up = 1.0f+2.0f*float(ulp);
-#if defined(__AVX512ER__)
- const vboolx m_node((1 << N)-1);
- const vbool<Nx> hit_mask = le(m_node, round_down*tNear, round_up*tFar);
- vmask = mask_or(hit_mask, vmask, vmask, bitmask);
-#else
- const vbool<Nx> hit_mask = round_down*tNear <= round_up*tFar;
-#if defined(__AVX2__)
- vmask = vmask | (bitmask & vint<Nx>(hit_mask));
-#else
- vmask = select(hit_mask, vmask | bitmask, vmask);
-#endif
-#endif
- } while(m_active);
- return vmask;
- }
-
-
- static const size_t stackSizeSingle = 1+(N-1)*BVH::maxDepth;
-
- public:
- static void intersect(Accel::Intersectors* This, RayHitN** inputRays, size_t numRays, IntersectContext* context);
- static void occluded (Accel::Intersectors* This, RayN** inputRays, size_t numRays, IntersectContext* context);
-
- private:
- template<int K>
- static void intersectCoherent(Accel::Intersectors* This, RayHitK<K>** inputRays, size_t numRays, IntersectContext* context);
-
- template<int K>
- static void occludedCoherent(Accel::Intersectors* This, RayK<K>** inputRays, size_t numRays, IntersectContext* context);
-
- template<int K>
- static void occludedIncoherent(Accel::Intersectors* This, RayK<K>** inputRays, size_t numRays, IntersectContext* context);
- };
-
-
- /*! BVH ray stream intersector with direct fallback to packets. */
- template<int N, int Nx>
- class BVHNIntersectorStreamPacketFallback
- {
- public:
- static void intersect(Accel::Intersectors* This, RayHitN** inputRays, size_t numRays, IntersectContext* context);
- static void occluded (Accel::Intersectors* This, RayN** inputRays, size_t numRays, IntersectContext* context);
-
- private:
- template<int K>
- static void intersectK(Accel::Intersectors* This, RayHitK<K>** inputRays, size_t numRays, IntersectContext* context);
-
- template<int K>
- static void occludedK(Accel::Intersectors* This, RayK<K>** inputRays, size_t numRays, IntersectContext* context);
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream_filters.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream_filters.h
deleted file mode 100644
index cdeb923637..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream_filters.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/default.h"
-#include "../common/ray.h"
-#include "../common/scene.h"
-
-namespace embree
-{
- namespace isa
- {
- class RayStreamFilter
- {
- public:
- static void intersectAOS(Scene* scene, RTCRayHit* rays, size_t N, size_t stride, IntersectContext* context);
- static void intersectAOP(Scene* scene, RTCRayHit** rays, size_t N, IntersectContext* context);
- static void intersectSOA(Scene* scene, char* rays, size_t N, size_t numPackets, size_t stride, IntersectContext* context);
- static void intersectSOP(Scene* scene, const RTCRayHitNp* rays, size_t N, IntersectContext* context);
-
- static void occludedAOS(Scene* scene, RTCRay* rays, size_t N, size_t stride, IntersectContext* context);
- static void occludedAOP(Scene* scene, RTCRay** rays, size_t N, IntersectContext* context);
- static void occludedSOA(Scene* scene, char* rays, size_t N, size_t numPackets, size_t stride, IntersectContext* context);
- static void occludedSOP(Scene* scene, const RTCRayNp* rays, size_t N, IntersectContext* context);
-
- private:
- template<int K, bool intersect>
- static void filterAOS(Scene* scene, void* rays, size_t N, size_t stride, IntersectContext* context);
-
- template<int K, bool intersect>
- static void filterAOP(Scene* scene, void** rays, size_t N, IntersectContext* context);
-
- template<int K, bool intersect>
- static void filterSOA(Scene* scene, char* rays, size_t N, size_t numPackets, size_t stride, IntersectContext* context);
-
- template<int K, bool intersect>
- static void filterSOP(Scene* scene, const void* rays, size_t N, IntersectContext* context);
- };
- }
-};
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb.h
deleted file mode 100644
index baa4a8d805..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb.h
+++ /dev/null
@@ -1,213 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_node_base.h"
-
-namespace embree
-{
- /*! BVHN AABBNode */
- template<typename NodeRef, int N>
- struct AABBNode_t : public BaseNode_t<NodeRef, N>
- {
- using BaseNode_t<NodeRef,N>::children;
-
- struct Create
- {
- __forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc, size_t numChildren = 0) const
- {
- AABBNode_t* node = (AABBNode_t*) alloc.malloc0(sizeof(AABBNode_t),NodeRef::byteNodeAlignment); node->clear();
- return NodeRef::encodeNode(node);
- }
- };
-
- struct Set
- {
- __forceinline void operator() (NodeRef node, size_t i, NodeRef child, const BBox3fa& bounds) const {
- node.getAABBNode()->setRef(i,child);
- node.getAABBNode()->setBounds(i,bounds);
- }
- };
-
- struct Create2
- {
- template<typename BuildRecord>
- __forceinline NodeRef operator() (BuildRecord* children, const size_t num, const FastAllocator::CachedAllocator& alloc) const
- {
- AABBNode_t* node = (AABBNode_t*) alloc.malloc0(sizeof(AABBNode_t), NodeRef::byteNodeAlignment); node->clear();
- for (size_t i=0; i<num; i++) node->setBounds(i,children[i].bounds());
- return NodeRef::encodeNode(node);
- }
- };
-
- struct Set2
- {
- template<typename BuildRecord>
- __forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
- {
- AABBNode_t* node = ref.getAABBNode();
- for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
- return ref;
- }
- };
-
- struct Set3
- {
- Set3 (FastAllocator* allocator, PrimRef* prims)
- : allocator(allocator), prims(prims) {}
-
- template<typename BuildRecord>
- __forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
- {
- AABBNode_t* node = ref.getAABBNode();
- for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
-
- if (unlikely(precord.alloc_barrier))
- {
- PrimRef* begin = &prims[precord.prims.begin()];
- PrimRef* end = &prims[precord.prims.end()]; // FIXME: extended end for spatial split builder!!!!!
- size_t bytes = (size_t)end - (size_t)begin;
- allocator->addBlock(begin,bytes);
- }
-
- return ref;
- }
-
- FastAllocator* const allocator;
- PrimRef* const prims;
- };
-
- /*! Clears the node. */
- __forceinline void clear() {
- lower_x = lower_y = lower_z = pos_inf;
- upper_x = upper_y = upper_z = neg_inf;
- BaseNode_t<NodeRef,N>::clear();
- }
-
- /*! Sets bounding box and ID of child. */
- __forceinline void setRef(size_t i, const NodeRef& ref) {
- assert(i < N);
- children[i] = ref;
- }
-
- /*! Sets bounding box of child. */
- __forceinline void setBounds(size_t i, const BBox3fa& bounds)
- {
- assert(i < N);
- lower_x[i] = bounds.lower.x; lower_y[i] = bounds.lower.y; lower_z[i] = bounds.lower.z;
- upper_x[i] = bounds.upper.x; upper_y[i] = bounds.upper.y; upper_z[i] = bounds.upper.z;
- }
-
- /*! Sets bounding box and ID of child. */
- __forceinline void set(size_t i, const NodeRef& ref, const BBox3fa& bounds) {
- setBounds(i,bounds);
- children[i] = ref;
- }
-
- /*! Returns bounds of node. */
- __forceinline BBox3fa bounds() const {
- const Vec3fa lower(reduce_min(lower_x),reduce_min(lower_y),reduce_min(lower_z));
- const Vec3fa upper(reduce_max(upper_x),reduce_max(upper_y),reduce_max(upper_z));
- return BBox3fa(lower,upper);
- }
-
- /*! Returns bounds of specified child. */
- __forceinline BBox3fa bounds(size_t i) const
- {
- assert(i < N);
- const Vec3fa lower(lower_x[i],lower_y[i],lower_z[i]);
- const Vec3fa upper(upper_x[i],upper_y[i],upper_z[i]);
- return BBox3fa(lower,upper);
- }
-
- /*! Returns extent of bounds of specified child. */
- __forceinline Vec3fa extend(size_t i) const {
- return bounds(i).size();
- }
-
- /*! Returns bounds of all children (implemented later as specializations) */
- __forceinline void bounds(BBox<vfloat4>& bounds0, BBox<vfloat4>& bounds1, BBox<vfloat4>& bounds2, BBox<vfloat4>& bounds3) const;
-
- /*! swap two children of the node */
- __forceinline void swap(size_t i, size_t j)
- {
- assert(i<N && j<N);
- std::swap(children[i],children[j]);
- std::swap(lower_x[i],lower_x[j]);
- std::swap(lower_y[i],lower_y[j]);
- std::swap(lower_z[i],lower_z[j]);
- std::swap(upper_x[i],upper_x[j]);
- std::swap(upper_y[i],upper_y[j]);
- std::swap(upper_z[i],upper_z[j]);
- }
-
- /*! swap the children of two nodes */
- __forceinline static void swap(AABBNode_t* a, size_t i, AABBNode_t* b, size_t j)
- {
- assert(i<N && j<N);
- std::swap(a->children[i],b->children[j]);
- std::swap(a->lower_x[i],b->lower_x[j]);
- std::swap(a->lower_y[i],b->lower_y[j]);
- std::swap(a->lower_z[i],b->lower_z[j]);
- std::swap(a->upper_x[i],b->upper_x[j]);
- std::swap(a->upper_y[i],b->upper_y[j]);
- std::swap(a->upper_z[i],b->upper_z[j]);
- }
-
- /*! compacts a node (moves empty children to the end) */
- __forceinline static void compact(AABBNode_t* a)
- {
- /* find right most filled node */
- ssize_t j=N;
- for (j=j-1; j>=0; j--)
- if (a->child(j) != NodeRef::emptyNode)
- break;
-
- /* replace empty nodes with filled nodes */
- for (ssize_t i=0; i<j; i++) {
- if (a->child(i) == NodeRef::emptyNode) {
- a->swap(i,j);
- for (j=j-1; j>i; j--)
- if (a->child(j) != NodeRef::emptyNode)
- break;
- }
- }
- }
-
- /*! Returns reference to specified child */
- __forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
- __forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
-
- /*! output operator */
- friend embree_ostream operator<<(embree_ostream o, const AABBNode_t& n)
- {
- o << "AABBNode { " << embree_endl;
- o << " lower_x " << n.lower_x << embree_endl;
- o << " upper_x " << n.upper_x << embree_endl;
- o << " lower_y " << n.lower_y << embree_endl;
- o << " upper_y " << n.upper_y << embree_endl;
- o << " lower_z " << n.lower_z << embree_endl;
- o << " upper_z " << n.upper_z << embree_endl;
- o << " children = ";
- for (size_t i=0; i<N; i++) o << n.children[i] << " ";
- o << embree_endl;
- o << "}" << embree_endl;
- return o;
- }
-
- public:
- vfloat<N> lower_x; //!< X dimension of lower bounds of all N children.
- vfloat<N> upper_x; //!< X dimension of upper bounds of all N children.
- vfloat<N> lower_y; //!< Y dimension of lower bounds of all N children.
- vfloat<N> upper_y; //!< Y dimension of upper bounds of all N children.
- vfloat<N> lower_z; //!< Z dimension of lower bounds of all N children.
- vfloat<N> upper_z; //!< Z dimension of upper bounds of all N children.
- };
-
- template<>
- __forceinline void AABBNode_t<NodeRefPtr<4>,4>::bounds(BBox<vfloat4>& bounds0, BBox<vfloat4>& bounds1, BBox<vfloat4>& bounds2, BBox<vfloat4>& bounds3) const {
- transpose(lower_x,lower_y,lower_z,vfloat4(zero),bounds0.lower,bounds1.lower,bounds2.lower,bounds3.lower);
- transpose(upper_x,upper_y,upper_z,vfloat4(zero),bounds0.upper,bounds1.upper,bounds2.upper,bounds3.upper);
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb.h
deleted file mode 100644
index 501f4bce5b..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb.h
+++ /dev/null
@@ -1,247 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_node_base.h"
-
-namespace embree
-{
- /*! Motion Blur AABBNode */
- template<typename NodeRef, int N>
- struct AABBNodeMB_t : public BaseNode_t<NodeRef, N>
- {
- using BaseNode_t<NodeRef,N>::children;
- typedef BVHNodeRecord<NodeRef> NodeRecord;
- typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
- typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
-
- struct Create
- {
- template<typename BuildRecord>
- __forceinline NodeRef operator() (BuildRecord* children, const size_t num, const FastAllocator::CachedAllocator& alloc) const
- {
- AABBNodeMB_t* node = (AABBNodeMB_t*) alloc.malloc0(sizeof(AABBNodeMB_t),NodeRef::byteNodeAlignment); node->clear();
- return NodeRef::encodeNode(node);
- }
- };
-
- struct Set
- {
- template<typename BuildRecord>
- __forceinline NodeRecordMB operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRecordMB* children, const size_t num) const
- {
- AABBNodeMB_t* node = ref.getAABBNodeMB();
-
- LBBox3fa bounds = empty;
- for (size_t i=0; i<num; i++) {
- node->setRef(i,children[i].ref);
- node->setBounds(i,children[i].lbounds);
- bounds.extend(children[i].lbounds);
- }
- return NodeRecordMB(ref,bounds);
- }
- };
-
- struct SetTimeRange
- {
- __forceinline SetTimeRange(BBox1f tbounds) : tbounds(tbounds) {}
-
- template<typename BuildRecord>
- __forceinline NodeRecordMB operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRecordMB* children, const size_t num) const
- {
- AABBNodeMB_t* node = ref.getAABBNodeMB();
-
- LBBox3fa bounds = empty;
- for (size_t i=0; i<num; i++) {
- node->setRef(i, children[i].ref);
- node->setBounds(i, children[i].lbounds, tbounds);
- bounds.extend(children[i].lbounds);
- }
- return NodeRecordMB(ref,bounds);
- }
-
- BBox1f tbounds;
- };
-
- /*! Clears the node. */
- __forceinline void clear() {
- lower_x = lower_y = lower_z = vfloat<N>(pos_inf);
- upper_x = upper_y = upper_z = vfloat<N>(neg_inf);
- lower_dx = lower_dy = lower_dz = vfloat<N>(0.0f);
- upper_dx = upper_dy = upper_dz = vfloat<N>(0.0f);
- BaseNode_t<NodeRef,N>::clear();
- }
-
- /*! Sets ID of child. */
- __forceinline void setRef(size_t i, NodeRef ref) {
- children[i] = ref;
- }
-
- /*! Sets bounding box of child. */
- __forceinline void setBounds(size_t i, const BBox3fa& bounds0_i, const BBox3fa& bounds1_i)
- {
- /*! for empty bounds we have to avoid inf-inf=nan */
- BBox3fa bounds0(min(bounds0_i.lower,Vec3fa(+FLT_MAX)),max(bounds0_i.upper,Vec3fa(-FLT_MAX)));
- BBox3fa bounds1(min(bounds1_i.lower,Vec3fa(+FLT_MAX)),max(bounds1_i.upper,Vec3fa(-FLT_MAX)));
- bounds0 = bounds0.enlarge_by(4.0f*float(ulp));
- bounds1 = bounds1.enlarge_by(4.0f*float(ulp));
- Vec3fa dlower = bounds1.lower-bounds0.lower;
- Vec3fa dupper = bounds1.upper-bounds0.upper;
-
- lower_x[i] = bounds0.lower.x; lower_y[i] = bounds0.lower.y; lower_z[i] = bounds0.lower.z;
- upper_x[i] = bounds0.upper.x; upper_y[i] = bounds0.upper.y; upper_z[i] = bounds0.upper.z;
-
- lower_dx[i] = dlower.x; lower_dy[i] = dlower.y; lower_dz[i] = dlower.z;
- upper_dx[i] = dupper.x; upper_dy[i] = dupper.y; upper_dz[i] = dupper.z;
- }
-
- /*! Sets bounding box of child. */
- __forceinline void setBounds(size_t i, const LBBox3fa& bounds) {
- setBounds(i, bounds.bounds0, bounds.bounds1);
- }
-
- /*! Sets bounding box of child. */
- __forceinline void setBounds(size_t i, const LBBox3fa& bounds, const BBox1f& tbounds) {
- setBounds(i, bounds.global(tbounds));
- }
-
- /*! Sets bounding box and ID of child. */
- __forceinline void set(size_t i, NodeRef ref, const BBox3fa& bounds) {
- lower_x[i] = bounds.lower.x; lower_y[i] = bounds.lower.y; lower_z[i] = bounds.lower.z;
- upper_x[i] = bounds.upper.x; upper_y[i] = bounds.upper.y; upper_z[i] = bounds.upper.z;
- children[i] = ref;
- }
-
- /*! Sets bounding box and ID of child. */
- __forceinline void set(size_t i, const NodeRecordMB4D& child)
- {
- setRef(i, child.ref);
- setBounds(i, child.lbounds, child.dt);
- }
-
- /*! Return bounding box for time 0 */
- __forceinline BBox3fa bounds0(size_t i) const {
- return BBox3fa(Vec3fa(lower_x[i],lower_y[i],lower_z[i]),
- Vec3fa(upper_x[i],upper_y[i],upper_z[i]));
- }
-
- /*! Return bounding box for time 1 */
- __forceinline BBox3fa bounds1(size_t i) const {
- return BBox3fa(Vec3fa(lower_x[i]+lower_dx[i],lower_y[i]+lower_dy[i],lower_z[i]+lower_dz[i]),
- Vec3fa(upper_x[i]+upper_dx[i],upper_y[i]+upper_dy[i],upper_z[i]+upper_dz[i]));
- }
-
- /*! Returns bounds of node. */
- __forceinline BBox3fa bounds() const {
- return BBox3fa(Vec3fa(reduce_min(min(lower_x,lower_x+lower_dx)),
- reduce_min(min(lower_y,lower_y+lower_dy)),
- reduce_min(min(lower_z,lower_z+lower_dz))),
- Vec3fa(reduce_max(max(upper_x,upper_x+upper_dx)),
- reduce_max(max(upper_y,upper_y+upper_dy)),
- reduce_max(max(upper_z,upper_z+upper_dz))));
- }
-
- /*! Return bounding box of child i */
- __forceinline BBox3fa bounds(size_t i) const {
- return merge(bounds0(i),bounds1(i));
- }
-
- /*! Return linear bounding box of child i */
- __forceinline LBBox3fa lbounds(size_t i) const {
- return LBBox3fa(bounds0(i),bounds1(i));
- }
-
- /*! Return bounding box of child i at specified time */
- __forceinline BBox3fa bounds(size_t i, float time) const {
- return lerp(bounds0(i),bounds1(i),time);
- }
-
- /*! Returns the expected surface area when randomly sampling the time. */
- __forceinline float expectedHalfArea(size_t i) const {
- return lbounds(i).expectedHalfArea();
- }
-
- /*! Returns the expected surface area when randomly sampling the time. */
- __forceinline float expectedHalfArea(size_t i, const BBox1f& t0t1) const {
- return lbounds(i).expectedHalfArea(t0t1);
- }
-
- /*! swap two children of the node */
- __forceinline void swap(size_t i, size_t j)
- {
- assert(i<N && j<N);
- std::swap(children[i],children[j]);
-
- std::swap(lower_x[i],lower_x[j]);
- std::swap(upper_x[i],upper_x[j]);
- std::swap(lower_y[i],lower_y[j]);
- std::swap(upper_y[i],upper_y[j]);
- std::swap(lower_z[i],lower_z[j]);
- std::swap(upper_z[i],upper_z[j]);
-
- std::swap(lower_dx[i],lower_dx[j]);
- std::swap(upper_dx[i],upper_dx[j]);
- std::swap(lower_dy[i],lower_dy[j]);
- std::swap(upper_dy[i],upper_dy[j]);
- std::swap(lower_dz[i],lower_dz[j]);
- std::swap(upper_dz[i],upper_dz[j]);
- }
-
- /*! compacts a node (moves empty children to the end) */
- __forceinline static void compact(AABBNodeMB_t* a)
- {
- /* find right most filled node */
- ssize_t j=N;
- for (j=j-1; j>=0; j--)
- if (a->child(j) != NodeRef::emptyNode)
- break;
-
- /* replace empty nodes with filled nodes */
- for (ssize_t i=0; i<j; i++) {
- if (a->child(i) == NodeRef::emptyNode) {
- a->swap(i,j);
- for (j=j-1; j>i; j--)
- if (a->child(j) != NodeRef::emptyNode)
- break;
- }
- }
- }
-
- /*! Returns reference to specified child */
- __forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
- __forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
-
- /*! stream output operator */
- friend embree_ostream operator<<(embree_ostream cout, const AABBNodeMB_t& n)
- {
- cout << "AABBNodeMB {" << embree_endl;
- for (size_t i=0; i<N; i++)
- {
- const BBox3fa b0 = n.bounds0(i);
- const BBox3fa b1 = n.bounds1(i);
- cout << " child" << i << " { " << embree_endl;
- cout << " bounds0 = " << b0 << ", " << embree_endl;
- cout << " bounds1 = " << b1 << ", " << embree_endl;
- cout << " }";
- }
- cout << "}";
- return cout;
- }
-
- public:
- vfloat<N> lower_x; //!< X dimension of lower bounds of all N children.
- vfloat<N> upper_x; //!< X dimension of upper bounds of all N children.
- vfloat<N> lower_y; //!< Y dimension of lower bounds of all N children.
- vfloat<N> upper_y; //!< Y dimension of upper bounds of all N children.
- vfloat<N> lower_z; //!< Z dimension of lower bounds of all N children.
- vfloat<N> upper_z; //!< Z dimension of upper bounds of all N children.
-
- vfloat<N> lower_dx; //!< X dimension of lower bounds of all N children.
- vfloat<N> upper_dx; //!< X dimension of upper bounds of all N children.
- vfloat<N> lower_dy; //!< Y dimension of lower bounds of all N children.
- vfloat<N> upper_dy; //!< Y dimension of upper bounds of all N children.
- vfloat<N> lower_dz; //!< Z dimension of lower bounds of all N children.
- vfloat<N> upper_dz; //!< Z dimension of upper bounds of all N children.
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb4d.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb4d.h
deleted file mode 100644
index e968bbbc39..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb4d.h
+++ /dev/null
@@ -1,107 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_node_aabb_mb.h"
-
-namespace embree
-{
- /*! Aligned 4D Motion Blur Node */
- template<typename NodeRef, int N>
- struct AABBNodeMB4D_t : public AABBNodeMB_t<NodeRef, N>
- {
- using BaseNode_t<NodeRef,N>::children;
- using AABBNodeMB_t<NodeRef,N>::set;
-
- typedef BVHNodeRecord<NodeRef> NodeRecord;
- typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
- typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
-
- struct Create
- {
- template<typename BuildRecord>
- __forceinline NodeRef operator() (BuildRecord*, const size_t, const FastAllocator::CachedAllocator& alloc, bool hasTimeSplits = true) const
- {
- if (hasTimeSplits)
- {
- AABBNodeMB4D_t* node = (AABBNodeMB4D_t*) alloc.malloc0(sizeof(AABBNodeMB4D_t),NodeRef::byteNodeAlignment); node->clear();
- return NodeRef::encodeNode(node);
- }
- else
- {
- AABBNodeMB_t<NodeRef,N>* node = (AABBNodeMB_t<NodeRef,N>*) alloc.malloc0(sizeof(AABBNodeMB_t<NodeRef,N>),NodeRef::byteNodeAlignment); node->clear();
- return NodeRef::encodeNode(node);
- }
- }
- };
-
- struct Set
- {
- template<typename BuildRecord>
- __forceinline void operator() (const BuildRecord&, const BuildRecord*, NodeRef ref, NodeRecordMB4D* children, const size_t num) const
- {
- if (likely(ref.isAABBNodeMB())) {
- for (size_t i=0; i<num; i++)
- ref.getAABBNodeMB()->set(i, children[i]);
- } else {
- for (size_t i=0; i<num; i++)
- ref.getAABBNodeMB4D()->set(i, children[i]);
- }
- }
- };
-
- /*! Clears the node. */
- __forceinline void clear() {
- lower_t = vfloat<N>(pos_inf);
- upper_t = vfloat<N>(neg_inf);
- AABBNodeMB_t<NodeRef,N>::clear();
- }
-
- /*! Sets bounding box of child. */
- __forceinline void setBounds(size_t i, const LBBox3fa& bounds, const BBox1f& tbounds)
- {
- AABBNodeMB_t<NodeRef,N>::setBounds(i, bounds.global(tbounds));
- lower_t[i] = tbounds.lower;
- upper_t[i] = tbounds.upper == 1.0f ? 1.0f+float(ulp) : tbounds.upper;
- }
-
- /*! Sets bounding box and ID of child. */
- __forceinline void set(size_t i, const NodeRecordMB4D& child) {
- AABBNodeMB_t<NodeRef,N>::setRef(i,child.ref);
- setBounds(i, child.lbounds, child.dt);
- }
-
- /*! Returns the expected surface area when randomly sampling the time. */
- __forceinline float expectedHalfArea(size_t i) const {
- return AABBNodeMB_t<NodeRef,N>::lbounds(i).expectedHalfArea(timeRange(i));
- }
-
- /*! returns time range for specified child */
- __forceinline BBox1f timeRange(size_t i) const {
- return BBox1f(lower_t[i],upper_t[i]);
- }
-
- /*! stream output operator */
- friend embree_ostream operator<<(embree_ostream cout, const AABBNodeMB4D_t& n)
- {
- cout << "AABBNodeMB4D {" << embree_endl;
- for (size_t i=0; i<N; i++)
- {
- const BBox3fa b0 = n.bounds0(i);
- const BBox3fa b1 = n.bounds1(i);
- cout << " child" << i << " { " << embree_endl;
- cout << " bounds0 = " << lerp(b0,b1,n.lower_t[i]) << ", " << embree_endl;
- cout << " bounds1 = " << lerp(b0,b1,n.upper_t[i]) << ", " << embree_endl;
- cout << " time_bounds = " << n.lower_t[i] << ", " << n.upper_t[i] << embree_endl;
- cout << " }";
- }
- cout << "}";
- return cout;
- }
-
- public:
- vfloat<N> lower_t; //!< time dimension of lower bounds of all N children
- vfloat<N> upper_t; //!< time dimension of upper bounds of all N children
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_base.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_base.h
deleted file mode 100644
index 8268f3b932..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_base.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_node_ref.h"
-
-namespace embree
-{
-
- /*! BVHN Base Node */
- template<typename NodeRef, int N>
- struct BaseNode_t
- {
- /*! Clears the node. */
- __forceinline void clear()
- {
- for (size_t i=0; i<N; i++)
- children[i] = NodeRef::emptyNode;
- }
-
- /*! Returns reference to specified child */
- __forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
- __forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
-
- /*! verifies the node */
- __forceinline bool verify() const
- {
- for (size_t i=0; i<N; i++) {
- if (child(i) == NodeRef::emptyNode) {
- for (; i<N; i++) {
- if (child(i) != NodeRef::emptyNode)
- return false;
- }
- break;
- }
- }
- return true;
- }
-
- NodeRef children[N]; //!< Pointer to the N children (can be a node or leaf)
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb.h
deleted file mode 100644
index fa7cc08211..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_node_base.h"
-
-namespace embree
-{
- /*! Node with unaligned bounds */
- template<typename NodeRef, int N>
- struct OBBNode_t : public BaseNode_t<NodeRef, N>
- {
- using BaseNode_t<NodeRef,N>::children;
-
- struct Create
- {
- __forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc) const
- {
- OBBNode_t* node = (OBBNode_t*) alloc.malloc0(sizeof(OBBNode_t),NodeRef::byteNodeAlignment); node->clear();
- return NodeRef::encodeNode(node);
- }
- };
-
- struct Set
- {
- __forceinline void operator() (NodeRef node, size_t i, NodeRef child, const OBBox3fa& bounds) const {
- node.ungetAABBNode()->setRef(i,child);
- node.ungetAABBNode()->setBounds(i,bounds);
- }
- };
-
- /*! Clears the node. */
- __forceinline void clear()
- {
- naabb.l.vx = Vec3fa(nan);
- naabb.l.vy = Vec3fa(nan);
- naabb.l.vz = Vec3fa(nan);
- naabb.p = Vec3fa(nan);
- BaseNode_t<NodeRef,N>::clear();
- }
-
- /*! Sets bounding box. */
- __forceinline void setBounds(size_t i, const OBBox3fa& b)
- {
- assert(i < N);
-
- AffineSpace3fa space = b.space;
- space.p -= b.bounds.lower;
- space = AffineSpace3fa::scale(1.0f/max(Vec3fa(1E-19f),b.bounds.upper-b.bounds.lower))*space;
-
- naabb.l.vx.x[i] = space.l.vx.x;
- naabb.l.vx.y[i] = space.l.vx.y;
- naabb.l.vx.z[i] = space.l.vx.z;
-
- naabb.l.vy.x[i] = space.l.vy.x;
- naabb.l.vy.y[i] = space.l.vy.y;
- naabb.l.vy.z[i] = space.l.vy.z;
-
- naabb.l.vz.x[i] = space.l.vz.x;
- naabb.l.vz.y[i] = space.l.vz.y;
- naabb.l.vz.z[i] = space.l.vz.z;
-
- naabb.p.x[i] = space.p.x;
- naabb.p.y[i] = space.p.y;
- naabb.p.z[i] = space.p.z;
- }
-
- /*! Sets ID of child. */
- __forceinline void setRef(size_t i, const NodeRef& ref) {
- assert(i < N);
- children[i] = ref;
- }
-
- /*! Returns the extent of the bounds of the ith child */
- __forceinline Vec3fa extent(size_t i) const {
- assert(i<N);
- const Vec3fa vx(naabb.l.vx.x[i],naabb.l.vx.y[i],naabb.l.vx.z[i]);
- const Vec3fa vy(naabb.l.vy.x[i],naabb.l.vy.y[i],naabb.l.vy.z[i]);
- const Vec3fa vz(naabb.l.vz.x[i],naabb.l.vz.y[i],naabb.l.vz.z[i]);
- return rsqrt(vx*vx + vy*vy + vz*vz);
- }
-
- /*! Returns reference to specified child */
- __forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
- __forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
-
- /*! output operator */
- friend embree_ostream operator<<(embree_ostream o, const OBBNode_t& n)
- {
- o << "UnAABBNode { " << n.naabb << " } " << embree_endl;
- return o;
- }
-
- public:
- AffineSpace3vf<N> naabb; //!< non-axis aligned bounding boxes (bounds are [0,1] in specified space)
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb_mb.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb_mb.h
deleted file mode 100644
index 834cf5ec28..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb_mb.h
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_node_base.h"
-
-namespace embree
-{
- template<typename NodeRef, int N>
- struct OBBNodeMB_t : public BaseNode_t<NodeRef, N>
- {
- using BaseNode_t<NodeRef,N>::children;
-
- struct Create
- {
- __forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc) const
- {
- OBBNodeMB_t* node = (OBBNodeMB_t*) alloc.malloc0(sizeof(OBBNodeMB_t),NodeRef::byteNodeAlignment); node->clear();
- return NodeRef::encodeNode(node);
- }
- };
-
- struct Set
- {
- __forceinline void operator() (NodeRef node, size_t i, NodeRef child, const LinearSpace3fa& space, const LBBox3fa& lbounds, const BBox1f dt) const {
- node.ungetAABBNodeMB()->setRef(i,child);
- node.ungetAABBNodeMB()->setBounds(i,space,lbounds.global(dt));
- }
- };
-
- /*! Clears the node. */
- __forceinline void clear()
- {
- space0 = one;
- //b0.lower = b0.upper = Vec3fa(nan);
- b1.lower = b1.upper = Vec3fa(nan);
- BaseNode_t<NodeRef,N>::clear();
- }
-
- /*! Sets space and bounding boxes. */
- __forceinline void setBounds(size_t i, const AffineSpace3fa& space, const LBBox3fa& lbounds) {
- setBounds(i,space,lbounds.bounds0,lbounds.bounds1);
- }
-
- /*! Sets space and bounding boxes. */
- __forceinline void setBounds(size_t i, const AffineSpace3fa& s0, const BBox3fa& a, const BBox3fa& c)
- {
- assert(i < N);
-
- AffineSpace3fa space = s0;
- space.p -= a.lower;
- Vec3fa scale = 1.0f/max(Vec3fa(1E-19f),a.upper-a.lower);
- space = AffineSpace3fa::scale(scale)*space;
- BBox3fa a1((a.lower-a.lower)*scale,(a.upper-a.lower)*scale);
- BBox3fa c1((c.lower-a.lower)*scale,(c.upper-a.lower)*scale);
-
- space0.l.vx.x[i] = space.l.vx.x; space0.l.vx.y[i] = space.l.vx.y; space0.l.vx.z[i] = space.l.vx.z;
- space0.l.vy.x[i] = space.l.vy.x; space0.l.vy.y[i] = space.l.vy.y; space0.l.vy.z[i] = space.l.vy.z;
- space0.l.vz.x[i] = space.l.vz.x; space0.l.vz.y[i] = space.l.vz.y; space0.l.vz.z[i] = space.l.vz.z;
- space0.p .x[i] = space.p .x; space0.p .y[i] = space.p .y; space0.p .z[i] = space.p .z;
-
- /*b0.lower.x[i] = a1.lower.x; b0.lower.y[i] = a1.lower.y; b0.lower.z[i] = a1.lower.z;
- b0.upper.x[i] = a1.upper.x; b0.upper.y[i] = a1.upper.y; b0.upper.z[i] = a1.upper.z;*/
-
- b1.lower.x[i] = c1.lower.x; b1.lower.y[i] = c1.lower.y; b1.lower.z[i] = c1.lower.z;
- b1.upper.x[i] = c1.upper.x; b1.upper.y[i] = c1.upper.y; b1.upper.z[i] = c1.upper.z;
- }
-
- /*! Sets ID of child. */
- __forceinline void setRef(size_t i, const NodeRef& ref) {
- assert(i < N);
- children[i] = ref;
- }
-
- /*! Returns the extent of the bounds of the ith child */
- __forceinline Vec3fa extent0(size_t i) const {
- assert(i < N);
- const Vec3fa vx(space0.l.vx.x[i],space0.l.vx.y[i],space0.l.vx.z[i]);
- const Vec3fa vy(space0.l.vy.x[i],space0.l.vy.y[i],space0.l.vy.z[i]);
- const Vec3fa vz(space0.l.vz.x[i],space0.l.vz.y[i],space0.l.vz.z[i]);
- return rsqrt(vx*vx + vy*vy + vz*vz);
- }
-
- public:
- AffineSpace3vf<N> space0;
- //BBox3vf<N> b0; // these are the unit bounds
- BBox3vf<N> b1;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_qaabb.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_qaabb.h
deleted file mode 100644
index 5212821f3f..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_qaabb.h
+++ /dev/null
@@ -1,265 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh_node_base.h"
-
-namespace embree
-{
- /*! BVHN Quantized Node */
- template<int N>
- struct __aligned(8) QuantizedBaseNode_t
- {
- typedef unsigned char T;
- static const T MIN_QUAN = 0;
- static const T MAX_QUAN = 255;
-
- /*! Clears the node. */
- __forceinline void clear() {
- for (size_t i=0; i<N; i++) lower_x[i] = lower_y[i] = lower_z[i] = MAX_QUAN;
- for (size_t i=0; i<N; i++) upper_x[i] = upper_y[i] = upper_z[i] = MIN_QUAN;
- }
-
- /*! Returns bounds of specified child. */
- __forceinline BBox3fa bounds(size_t i) const
- {
- assert(i < N);
- const Vec3fa lower(madd(scale.x,(float)lower_x[i],start.x),
- madd(scale.y,(float)lower_y[i],start.y),
- madd(scale.z,(float)lower_z[i],start.z));
- const Vec3fa upper(madd(scale.x,(float)upper_x[i],start.x),
- madd(scale.y,(float)upper_y[i],start.y),
- madd(scale.z,(float)upper_z[i],start.z));
- return BBox3fa(lower,upper);
- }
-
- /*! Returns extent of bounds of specified child. */
- __forceinline Vec3fa extent(size_t i) const {
- return bounds(i).size();
- }
-
- static __forceinline void init_dim(const vfloat<N> &lower,
- const vfloat<N> &upper,
- T lower_quant[N],
- T upper_quant[N],
- float &start,
- float &scale)
- {
- /* quantize bounds */
- const vbool<N> m_valid = lower != vfloat<N>(pos_inf);
- const float minF = reduce_min(lower);
- const float maxF = reduce_max(upper);
- float diff = (1.0f+2.0f*float(ulp))*(maxF - minF);
- float decode_scale = diff / float(MAX_QUAN);
- if (decode_scale == 0.0f) decode_scale = 2.0f*FLT_MIN; // result may have been flushed to zero
- assert(madd(decode_scale,float(MAX_QUAN),minF) >= maxF);
- const float encode_scale = diff > 0 ? (float(MAX_QUAN) / diff) : 0.0f;
- vint<N> ilower = max(vint<N>(floor((lower - vfloat<N>(minF))*vfloat<N>(encode_scale))),MIN_QUAN);
- vint<N> iupper = min(vint<N>(ceil ((upper - vfloat<N>(minF))*vfloat<N>(encode_scale))),MAX_QUAN);
-
- /* lower/upper correction */
- vbool<N> m_lower_correction = (madd(vfloat<N>(ilower),decode_scale,minF)) > lower;
- vbool<N> m_upper_correction = (madd(vfloat<N>(iupper),decode_scale,minF)) < upper;
- ilower = max(select(m_lower_correction,ilower-1,ilower),MIN_QUAN);
- iupper = min(select(m_upper_correction,iupper+1,iupper),MAX_QUAN);
-
- /* disable invalid lanes */
- ilower = select(m_valid,ilower,MAX_QUAN);
- iupper = select(m_valid,iupper,MIN_QUAN);
-
- /* store as uchar to memory */
- vint<N>::store(lower_quant,ilower);
- vint<N>::store(upper_quant,iupper);
- start = minF;
- scale = decode_scale;
-
-#if defined(DEBUG)
- vfloat<N> extract_lower( vint<N>::loadu(lower_quant) );
- vfloat<N> extract_upper( vint<N>::loadu(upper_quant) );
- vfloat<N> final_extract_lower = madd(extract_lower,decode_scale,minF);
- vfloat<N> final_extract_upper = madd(extract_upper,decode_scale,minF);
- assert( (movemask(final_extract_lower <= lower ) & movemask(m_valid)) == movemask(m_valid));
- assert( (movemask(final_extract_upper >= upper ) & movemask(m_valid)) == movemask(m_valid));
-#endif
- }
-
- __forceinline void init_dim(AABBNode_t<NodeRefPtr<N>,N>& node)
- {
- init_dim(node.lower_x,node.upper_x,lower_x,upper_x,start.x,scale.x);
- init_dim(node.lower_y,node.upper_y,lower_y,upper_y,start.y,scale.y);
- init_dim(node.lower_z,node.upper_z,lower_z,upper_z,start.z,scale.z);
- }
-
- __forceinline vbool<N> validMask() const { return vint<N>::loadu(lower_x) <= vint<N>::loadu(upper_x); }
-
-#if defined(__AVX512F__) // KNL
- __forceinline vbool16 validMask16() const { return le(0xff,vint<16>::loadu(lower_x),vint<16>::loadu(upper_x)); }
-#endif
- __forceinline vfloat<N> dequantizeLowerX() const { return madd(vfloat<N>(vint<N>::loadu(lower_x)),scale.x,vfloat<N>(start.x)); }
-
- __forceinline vfloat<N> dequantizeUpperX() const { return madd(vfloat<N>(vint<N>::loadu(upper_x)),scale.x,vfloat<N>(start.x)); }
-
- __forceinline vfloat<N> dequantizeLowerY() const { return madd(vfloat<N>(vint<N>::loadu(lower_y)),scale.y,vfloat<N>(start.y)); }
-
- __forceinline vfloat<N> dequantizeUpperY() const { return madd(vfloat<N>(vint<N>::loadu(upper_y)),scale.y,vfloat<N>(start.y)); }
-
- __forceinline vfloat<N> dequantizeLowerZ() const { return madd(vfloat<N>(vint<N>::loadu(lower_z)),scale.z,vfloat<N>(start.z)); }
-
- __forceinline vfloat<N> dequantizeUpperZ() const { return madd(vfloat<N>(vint<N>::loadu(upper_z)),scale.z,vfloat<N>(start.z)); }
-
- template <int M>
- __forceinline vfloat<M> dequantize(const size_t offset) const { return vfloat<M>(vint<M>::loadu(all_planes+offset)); }
-
-#if defined(__AVX512F__)
- __forceinline vfloat16 dequantizeLowerUpperX(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_x),p)),scale.x,vfloat16(start.x)); }
- __forceinline vfloat16 dequantizeLowerUpperY(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_y),p)),scale.y,vfloat16(start.y)); }
- __forceinline vfloat16 dequantizeLowerUpperZ(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_z),p)),scale.z,vfloat16(start.z)); }
-#endif
-
- union {
- struct {
- T lower_x[N]; //!< 8bit discretized X dimension of lower bounds of all N children
- T upper_x[N]; //!< 8bit discretized X dimension of upper bounds of all N children
- T lower_y[N]; //!< 8bit discretized Y dimension of lower bounds of all N children
- T upper_y[N]; //!< 8bit discretized Y dimension of upper bounds of all N children
- T lower_z[N]; //!< 8bit discretized Z dimension of lower bounds of all N children
- T upper_z[N]; //!< 8bit discretized Z dimension of upper bounds of all N children
- };
- T all_planes[6*N];
- };
-
- Vec3f start;
- Vec3f scale;
-
- friend embree_ostream operator<<(embree_ostream o, const QuantizedBaseNode_t& n)
- {
- o << "QuantizedBaseNode { " << embree_endl;
- o << " start " << n.start << embree_endl;
- o << " scale " << n.scale << embree_endl;
- o << " lower_x " << vuint<N>::loadu(n.lower_x) << embree_endl;
- o << " upper_x " << vuint<N>::loadu(n.upper_x) << embree_endl;
- o << " lower_y " << vuint<N>::loadu(n.lower_y) << embree_endl;
- o << " upper_y " << vuint<N>::loadu(n.upper_y) << embree_endl;
- o << " lower_z " << vuint<N>::loadu(n.lower_z) << embree_endl;
- o << " upper_z " << vuint<N>::loadu(n.upper_z) << embree_endl;
- o << "}" << embree_endl;
- return o;
- }
-
- };
-
- template<typename NodeRef, int N>
- struct __aligned(8) QuantizedNode_t : public BaseNode_t<NodeRef, N>, QuantizedBaseNode_t<N>
- {
- using BaseNode_t<NodeRef,N>::children;
- using QuantizedBaseNode_t<N>::lower_x;
- using QuantizedBaseNode_t<N>::upper_x;
- using QuantizedBaseNode_t<N>::lower_y;
- using QuantizedBaseNode_t<N>::upper_y;
- using QuantizedBaseNode_t<N>::lower_z;
- using QuantizedBaseNode_t<N>::upper_z;
- using QuantizedBaseNode_t<N>::start;
- using QuantizedBaseNode_t<N>::scale;
- using QuantizedBaseNode_t<N>::init_dim;
-
- __forceinline void setRef(size_t i, const NodeRef& ref) {
- assert(i < N);
- children[i] = ref;
- }
-
- struct Create2
- {
- template<typename BuildRecord>
- __forceinline NodeRef operator() (BuildRecord* children, const size_t n, const FastAllocator::CachedAllocator& alloc) const
- {
- __aligned(64) AABBNode_t<NodeRef,N> node;
- node.clear();
- for (size_t i=0; i<n; i++) {
- node.setBounds(i,children[i].bounds());
- }
- QuantizedNode_t *qnode = (QuantizedNode_t*) alloc.malloc0(sizeof(QuantizedNode_t), NodeRef::byteAlignment);
- qnode->init(node);
-
- return (size_t)qnode | NodeRef::tyQuantizedNode;
- }
- };
-
- struct Set2
- {
- template<typename BuildRecord>
- __forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
- {
- QuantizedNode_t* node = ref.quantizedNode();
- for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
- return ref;
- }
- };
-
- __forceinline void init(AABBNode_t<NodeRef,N>& node)
- {
- for (size_t i=0;i<N;i++) children[i] = NodeRef::emptyNode;
- init_dim(node);
- }
-
- };
-
- /*! BVHN Quantized Node */
- template<int N>
- struct __aligned(8) QuantizedBaseNodeMB_t
- {
- QuantizedBaseNode_t<N> node0;
- QuantizedBaseNode_t<N> node1;
-
- /*! Clears the node. */
- __forceinline void clear() {
- node0.clear();
- node1.clear();
- }
-
- /*! Returns bounds of specified child. */
- __forceinline BBox3fa bounds(size_t i) const
- {
- assert(i < N);
- BBox3fa bounds0 = node0.bounds(i);
- BBox3fa bounds1 = node1.bounds(i);
- bounds0.extend(bounds1);
- return bounds0;
- }
-
- /*! Returns extent of bounds of specified child. */
- __forceinline Vec3fa extent(size_t i) const {
- return bounds(i).size();
- }
-
- __forceinline vbool<N> validMask() const { return node0.validMask(); }
-
- template<typename T>
- __forceinline vfloat<N> dequantizeLowerX(const T t) const { return lerp(node0.dequantizeLowerX(),node1.dequantizeLowerX(),t); }
- template<typename T>
- __forceinline vfloat<N> dequantizeUpperX(const T t) const { return lerp(node0.dequantizeUpperX(),node1.dequantizeUpperX(),t); }
- template<typename T>
- __forceinline vfloat<N> dequantizeLowerY(const T t) const { return lerp(node0.dequantizeLowerY(),node1.dequantizeLowerY(),t); }
- template<typename T>
- __forceinline vfloat<N> dequantizeUpperY(const T t) const { return lerp(node0.dequantizeUpperY(),node1.dequantizeUpperY(),t); }
- template<typename T>
- __forceinline vfloat<N> dequantizeLowerZ(const T t) const { return lerp(node0.dequantizeLowerZ(),node1.dequantizeLowerZ(),t); }
- template<typename T>
- __forceinline vfloat<N> dequantizeUpperZ(const T t) const { return lerp(node0.dequantizeUpperZ(),node1.dequantizeUpperZ(),t); }
-
-
- template<int M>
- __forceinline vfloat<M> dequantizeLowerX(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerX()[i]),vfloat<M>(node1.dequantizeLowerX()[i]),t); }
- template<int M>
- __forceinline vfloat<M> dequantizeUpperX(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperX()[i]),vfloat<M>(node1.dequantizeUpperX()[i]),t); }
- template<int M>
- __forceinline vfloat<M> dequantizeLowerY(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerY()[i]),vfloat<M>(node1.dequantizeLowerY()[i]),t); }
- template<int M>
- __forceinline vfloat<M> dequantizeUpperY(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperY()[i]),vfloat<M>(node1.dequantizeUpperY()[i]),t); }
- template<int M>
- __forceinline vfloat<M> dequantizeLowerZ(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerZ()[i]),vfloat<M>(node1.dequantizeLowerZ()[i]),t); }
- template<int M>
- __forceinline vfloat<M> dequantizeUpperZ(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperZ()[i]),vfloat<M>(node1.dequantizeUpperZ()[i]),t); }
-
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_ref.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_node_ref.h
deleted file mode 100644
index 0f6d4dac7e..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_ref.h
+++ /dev/null
@@ -1,242 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/default.h"
-#include "../common/alloc.h"
-#include "../common/accel.h"
-#include "../common/device.h"
-#include "../common/scene.h"
-#include "../geometry/primitive.h"
-#include "../common/ray.h"
-
-namespace embree
-{
- /* BVH node reference with bounds */
- template<typename NodeRef>
- struct BVHNodeRecord
- {
- __forceinline BVHNodeRecord() {}
- __forceinline BVHNodeRecord(NodeRef ref, const BBox3fa& bounds) : ref(ref), bounds((BBox3fx)bounds) {}
- __forceinline BVHNodeRecord(NodeRef ref, const BBox3fx& bounds) : ref(ref), bounds(bounds) {}
-
- NodeRef ref;
- BBox3fx bounds;
- };
-
- template<typename NodeRef>
- struct BVHNodeRecordMB
- {
- __forceinline BVHNodeRecordMB() {}
- __forceinline BVHNodeRecordMB(NodeRef ref, const LBBox3fa& lbounds) : ref(ref), lbounds(lbounds) {}
-
- NodeRef ref;
- LBBox3fa lbounds;
- };
-
- template<typename NodeRef>
- struct BVHNodeRecordMB4D
- {
- __forceinline BVHNodeRecordMB4D() {}
- __forceinline BVHNodeRecordMB4D(NodeRef ref, const LBBox3fa& lbounds, const BBox1f& dt) : ref(ref), lbounds(lbounds), dt(dt) {}
-
- NodeRef ref;
- LBBox3fa lbounds;
- BBox1f dt;
- };
-
- template<typename NodeRef, int N> struct BaseNode_t;
- template<typename NodeRef, int N> struct AABBNode_t;
- template<typename NodeRef, int N> struct AABBNodeMB_t;
- template<typename NodeRef, int N> struct AABBNodeMB4D_t;
- template<typename NodeRef, int N> struct OBBNode_t;
- template<typename NodeRef, int N> struct OBBNodeMB_t;
- template<typename NodeRef, int N> struct QuantizedNode_t;
- template<typename NodeRef, int N> struct QuantizedNodeMB_t;
-
- /*! Pointer that points to a node or a list of primitives */
- template<int N>
- struct NodeRefPtr
- {
- //template<int NN> friend class BVHN;
-
- /*! Number of bytes the nodes and primitives are minimally aligned to.*/
- static const size_t byteAlignment = 16;
- static const size_t byteNodeAlignment = 4*N;
-
- /*! highest address bit is used as barrier for some algorithms */
- static const size_t barrier_mask = (1LL << (8*sizeof(size_t)-1));
-
- /*! Masks the bits that store the number of items per leaf. */
- static const size_t align_mask = byteAlignment-1;
- static const size_t items_mask = byteAlignment-1;
-
- /*! different supported node types */
- static const size_t tyAABBNode = 0;
- static const size_t tyAABBNodeMB = 1;
- static const size_t tyAABBNodeMB4D = 6;
- static const size_t tyOBBNode = 2;
- static const size_t tyOBBNodeMB = 3;
- static const size_t tyQuantizedNode = 5;
- static const size_t tyLeaf = 8;
-
- /*! Empty node */
- static const size_t emptyNode = tyLeaf;
-
- /*! Invalid node, used as marker in traversal */
- static const size_t invalidNode = (((size_t)-1) & (~items_mask)) | (tyLeaf+0);
- static const size_t popRay = (((size_t)-1) & (~items_mask)) | (tyLeaf+1);
-
- /*! Maximum number of primitive blocks in a leaf. */
- static const size_t maxLeafBlocks = items_mask-tyLeaf;
-
- /*! Default constructor */
- __forceinline NodeRefPtr () {}
-
- /*! Construction from integer */
- __forceinline NodeRefPtr (size_t ptr) : ptr(ptr) {}
-
- /*! Cast to size_t */
- __forceinline operator size_t() const { return ptr; }
-
- /*! Sets the barrier bit. */
- __forceinline void setBarrier() {
-#if defined(__X86_64__) || defined(__aarch64__)
- assert(!isBarrier());
- ptr |= barrier_mask;
-#else
- assert(false);
-#endif
- }
-
- /*! Clears the barrier bit. */
- __forceinline void clearBarrier() {
-#if defined(__X86_64__) || defined(__aarch64__)
- ptr &= ~barrier_mask;
-#else
- assert(false);
-#endif
- }
-
- /*! Checks if this is an barrier. A barrier tells the top level tree rotations how deep to enter the tree. */
- __forceinline bool isBarrier() const { return (ptr & barrier_mask) != 0; }
-
- /*! checks if this is a leaf */
- __forceinline size_t isLeaf() const { return ptr & tyLeaf; }
-
- /*! returns node type */
- __forceinline int type() const { return ptr & (size_t)align_mask; }
-
- /*! checks if this is a node */
- __forceinline int isAABBNode() const { return (ptr & (size_t)align_mask) == tyAABBNode; }
-
- /*! checks if this is a motion blur node */
- __forceinline int isAABBNodeMB() const { return (ptr & (size_t)align_mask) == tyAABBNodeMB; }
-
- /*! checks if this is a 4D motion blur node */
- __forceinline int isAABBNodeMB4D() const { return (ptr & (size_t)align_mask) == tyAABBNodeMB4D; }
-
- /*! checks if this is a node with unaligned bounding boxes */
- __forceinline int isOBBNode() const { return (ptr & (size_t)align_mask) == tyOBBNode; }
-
- /*! checks if this is a motion blur node with unaligned bounding boxes */
- __forceinline int isOBBNodeMB() const { return (ptr & (size_t)align_mask) == tyOBBNodeMB; }
-
- /*! checks if this is a quantized node */
- __forceinline int isQuantizedNode() const { return (ptr & (size_t)align_mask) == tyQuantizedNode; }
-
- /*! Encodes a node */
- static __forceinline NodeRefPtr encodeNode(AABBNode_t<NodeRefPtr,N>* node) {
- assert(!((size_t)node & align_mask));
- return NodeRefPtr((size_t) node);
- }
-
- static __forceinline NodeRefPtr encodeNode(AABBNodeMB_t<NodeRefPtr,N>* node) {
- assert(!((size_t)node & align_mask));
- return NodeRefPtr((size_t) node | tyAABBNodeMB);
- }
-
- static __forceinline NodeRefPtr encodeNode(AABBNodeMB4D_t<NodeRefPtr,N>* node) {
- assert(!((size_t)node & align_mask));
- return NodeRefPtr((size_t) node | tyAABBNodeMB4D);
- }
-
- /*! Encodes an unaligned node */
- static __forceinline NodeRefPtr encodeNode(OBBNode_t<NodeRefPtr,N>* node) {
- return NodeRefPtr((size_t) node | tyOBBNode);
- }
-
- /*! Encodes an unaligned motion blur node */
- static __forceinline NodeRefPtr encodeNode(OBBNodeMB_t<NodeRefPtr,N>* node) {
- return NodeRefPtr((size_t) node | tyOBBNodeMB);
- }
-
- /*! Encodes a leaf */
- static __forceinline NodeRefPtr encodeLeaf(void* tri, size_t num) {
- assert(!((size_t)tri & align_mask));
- assert(num <= maxLeafBlocks);
- return NodeRefPtr((size_t)tri | (tyLeaf+min(num,(size_t)maxLeafBlocks)));
- }
-
- /*! Encodes a leaf */
- static __forceinline NodeRefPtr encodeTypedLeaf(void* ptr, size_t ty) {
- assert(!((size_t)ptr & align_mask));
- return NodeRefPtr((size_t)ptr | (tyLeaf+ty));
- }
-
- /*! returns base node pointer */
- __forceinline BaseNode_t<NodeRefPtr,N>* baseNode()
- {
- assert(!isLeaf());
- return (BaseNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask);
- }
- __forceinline const BaseNode_t<NodeRefPtr,N>* baseNode() const
- {
- assert(!isLeaf());
- return (const BaseNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask);
- }
-
- /*! returns node pointer */
- __forceinline AABBNode_t<NodeRefPtr,N>* getAABBNode() { assert(isAABBNode()); return ( AABBNode_t<NodeRefPtr,N>*)ptr; }
- __forceinline const AABBNode_t<NodeRefPtr,N>* getAABBNode() const { assert(isAABBNode()); return (const AABBNode_t<NodeRefPtr,N>*)ptr; }
-
- /*! returns motion blur node pointer */
- __forceinline AABBNodeMB_t<NodeRefPtr,N>* getAABBNodeMB() { assert(isAABBNodeMB() || isAABBNodeMB4D()); return ( AABBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
- __forceinline const AABBNodeMB_t<NodeRefPtr,N>* getAABBNodeMB() const { assert(isAABBNodeMB() || isAABBNodeMB4D()); return (const AABBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
-
- /*! returns 4D motion blur node pointer */
- __forceinline AABBNodeMB4D_t<NodeRefPtr,N>* getAABBNodeMB4D() { assert(isAABBNodeMB4D()); return ( AABBNodeMB4D_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
- __forceinline const AABBNodeMB4D_t<NodeRefPtr,N>* getAABBNodeMB4D() const { assert(isAABBNodeMB4D()); return (const AABBNodeMB4D_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
-
- /*! returns unaligned node pointer */
- __forceinline OBBNode_t<NodeRefPtr,N>* ungetAABBNode() { assert(isOBBNode()); return ( OBBNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
- __forceinline const OBBNode_t<NodeRefPtr,N>* ungetAABBNode() const { assert(isOBBNode()); return (const OBBNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
-
- /*! returns unaligned motion blur node pointer */
- __forceinline OBBNodeMB_t<NodeRefPtr,N>* ungetAABBNodeMB() { assert(isOBBNodeMB()); return ( OBBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
- __forceinline const OBBNodeMB_t<NodeRefPtr,N>* ungetAABBNodeMB() const { assert(isOBBNodeMB()); return (const OBBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
-
- /*! returns quantized node pointer */
- __forceinline QuantizedNode_t<NodeRefPtr,N>* quantizedNode() { assert(isQuantizedNode()); return ( QuantizedNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask ); }
- __forceinline const QuantizedNode_t<NodeRefPtr,N>* quantizedNode() const { assert(isQuantizedNode()); return (const QuantizedNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask ); }
-
- /*! returns leaf pointer */
- __forceinline char* leaf(size_t& num) const {
- assert(isLeaf());
- num = (ptr & (size_t)items_mask)-tyLeaf;
- return (char*)(ptr & ~(size_t)align_mask);
- }
-
- /*! clear all bit flags */
- __forceinline void clearFlags() {
- ptr &= ~(size_t)align_mask;
- }
-
- /*! returns the wideness */
- __forceinline size_t getN() const { return N; }
-
- public:
- size_t ptr;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.cpp
deleted file mode 100644
index a273c21e8b..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.cpp
+++ /dev/null
@@ -1,247 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh_refit.h"
-#include "bvh_statistics.h"
-
-#include "../geometry/linei.h"
-#include "../geometry/triangle.h"
-#include "../geometry/trianglev.h"
-#include "../geometry/trianglei.h"
-#include "../geometry/quadv.h"
-#include "../geometry/object.h"
-#include "../geometry/instance.h"
-
-namespace embree
-{
- namespace isa
- {
- static const size_t SINGLE_THREAD_THRESHOLD = 4*1024;
-
- template<int N>
- __forceinline bool compare(const typename BVHN<N>::NodeRef* a, const typename BVHN<N>::NodeRef* b)
- {
- size_t sa = *(size_t*)&a->node()->lower_x;
- size_t sb = *(size_t*)&b->node()->lower_x;
- return sa < sb;
- }
-
- template<int N>
- BVHNRefitter<N>::BVHNRefitter (BVH* bvh, const LeafBoundsInterface& leafBounds)
- : bvh(bvh), leafBounds(leafBounds), numSubTrees(0)
- {
- }
-
- template<int N>
- void BVHNRefitter<N>::refit()
- {
- if (bvh->numPrimitives <= SINGLE_THREAD_THRESHOLD) {
- bvh->bounds = LBBox3fa(recurse_bottom(bvh->root));
- }
- else
- {
- BBox3fa subTreeBounds[MAX_NUM_SUB_TREES];
- numSubTrees = 0;
- gather_subtree_refs(bvh->root,numSubTrees,0);
- if (numSubTrees)
- parallel_for(size_t(0), numSubTrees, size_t(1), [&](const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- NodeRef& ref = subTrees[i];
- subTreeBounds[i] = recurse_bottom(ref);
- }
- });
-
- numSubTrees = 0;
- bvh->bounds = LBBox3fa(refit_toplevel(bvh->root,numSubTrees,subTreeBounds,0));
- }
- }
-
- template<int N>
- void BVHNRefitter<N>::gather_subtree_refs(NodeRef& ref,
- size_t &subtrees,
- const size_t depth)
- {
- if (depth >= MAX_SUB_TREE_EXTRACTION_DEPTH)
- {
- assert(subtrees < MAX_NUM_SUB_TREES);
- subTrees[subtrees++] = ref;
- return;
- }
-
- if (ref.isAABBNode())
- {
- AABBNode* node = ref.getAABBNode();
- for (size_t i=0; i<N; i++) {
- NodeRef& child = node->child(i);
- if (unlikely(child == BVH::emptyNode)) continue;
- gather_subtree_refs(child,subtrees,depth+1);
- }
- }
- }
-
- template<int N>
- BBox3fa BVHNRefitter<N>::refit_toplevel(NodeRef& ref,
- size_t &subtrees,
- const BBox3fa *const subTreeBounds,
- const size_t depth)
- {
- if (depth >= MAX_SUB_TREE_EXTRACTION_DEPTH)
- {
- assert(subtrees < MAX_NUM_SUB_TREES);
- assert(subTrees[subtrees] == ref);
- return subTreeBounds[subtrees++];
- }
-
- if (ref.isAABBNode())
- {
- AABBNode* node = ref.getAABBNode();
- BBox3fa bounds[N];
-
- for (size_t i=0; i<N; i++)
- {
- NodeRef& child = node->child(i);
-
- if (unlikely(child == BVH::emptyNode))
- bounds[i] = BBox3fa(empty);
- else
- bounds[i] = refit_toplevel(child,subtrees,subTreeBounds,depth+1);
- }
-
- BBox3vf<N> boundsT = transpose<N>(bounds);
-
- /* set new bounds */
- node->lower_x = boundsT.lower.x;
- node->lower_y = boundsT.lower.y;
- node->lower_z = boundsT.lower.z;
- node->upper_x = boundsT.upper.x;
- node->upper_y = boundsT.upper.y;
- node->upper_z = boundsT.upper.z;
-
- return merge<N>(bounds);
- }
- else
- return leafBounds.leafBounds(ref);
- }
-
- // =========================================================
- // =========================================================
- // =========================================================
-
-
- template<int N>
- BBox3fa BVHNRefitter<N>::recurse_bottom(NodeRef& ref)
- {
- /* this is a leaf node */
- if (unlikely(ref.isLeaf()))
- return leafBounds.leafBounds(ref);
-
- /* recurse if this is an internal node */
- AABBNode* node = ref.getAABBNode();
-
- /* enable exclusive prefetch for >= AVX platforms */
-#if defined(__AVX__)
- BVH::prefetchW(ref);
-#endif
- BBox3fa bounds[N];
-
- for (size_t i=0; i<N; i++)
- if (unlikely(node->child(i) == BVH::emptyNode))
- {
- bounds[i] = BBox3fa(empty);
- }
- else
- bounds[i] = recurse_bottom(node->child(i));
-
- /* AOS to SOA transform */
- BBox3vf<N> boundsT = transpose<N>(bounds);
-
- /* set new bounds */
- node->lower_x = boundsT.lower.x;
- node->lower_y = boundsT.lower.y;
- node->lower_z = boundsT.lower.z;
- node->upper_x = boundsT.upper.x;
- node->upper_y = boundsT.upper.y;
- node->upper_z = boundsT.upper.z;
-
- return merge<N>(bounds);
- }
-
- template<int N, typename Mesh, typename Primitive>
- BVHNRefitT<N,Mesh,Primitive>::BVHNRefitT (BVH* bvh, Builder* builder, Mesh* mesh, size_t mode)
- : bvh(bvh), builder(builder), refitter(new BVHNRefitter<N>(bvh,*(typename BVHNRefitter<N>::LeafBoundsInterface*)this)), mesh(mesh), topologyVersion(0) {}
-
- template<int N, typename Mesh, typename Primitive>
- void BVHNRefitT<N,Mesh,Primitive>::clear()
- {
- if (builder)
- builder->clear();
- }
-
- template<int N, typename Mesh, typename Primitive>
- void BVHNRefitT<N,Mesh,Primitive>::build()
- {
- if (mesh->topologyChanged(topologyVersion)) {
- topologyVersion = mesh->getTopologyVersion();
- builder->build();
- }
- else
- refitter->refit();
- }
-
- template class BVHNRefitter<4>;
-#if defined(__AVX__)
- template class BVHNRefitter<8>;
-#endif
-
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- Builder* BVH4Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
- Builder* BVH4Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
- Builder* BVH4Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
-
- Builder* BVH4Triangle4MeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4> ((BVH4*)accel,BVH4Triangle4MeshBuilderSAH (accel,mesh,geomID,mode),mesh,mode); }
- Builder* BVH4Triangle4vMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4v>((BVH4*)accel,BVH4Triangle4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
- Builder* BVH4Triangle4iMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4i>((BVH4*)accel,BVH4Triangle4iMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
-#if defined(__AVX__)
- Builder* BVH8Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
- Builder* BVH8Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
- Builder* BVH8Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
-
- Builder* BVH8Triangle4MeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4> ((BVH8*)accel,BVH8Triangle4MeshBuilderSAH (accel,mesh,geomID,mode),mesh,mode); }
- Builder* BVH8Triangle4vMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4v>((BVH8*)accel,BVH8Triangle4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
- Builder* BVH8Triangle4iMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4i>((BVH8*)accel,BVH8Triangle4iMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_QUAD)
- Builder* BVH4Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode);
- Builder* BVH4Quad4vMeshRefitSAH (void* accel, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,QuadMesh,Quad4v>((BVH4*)accel,BVH4Quad4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
-
-#if defined(__AVX__)
- Builder* BVH8Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode);
- Builder* BVH8Quad4vMeshRefitSAH (void* accel, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,QuadMesh,Quad4v>((BVH8*)accel,BVH8Quad4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
-#endif
-
-#endif
-
-#if defined(EMBREE_GEOMETRY_USER)
- Builder* BVH4VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode);
- Builder* BVH4VirtualMeshRefitSAH (void* accel, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,UserGeometry,Object>((BVH4*)accel,BVH4VirtualMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
-
-#if defined(__AVX__)
- Builder* BVH8VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode);
- Builder* BVH8VirtualMeshRefitSAH (void* accel, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,UserGeometry,Object>((BVH8*)accel,BVH8VirtualMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
-#endif
-#endif
-
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- Builder* BVH4InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
- Builder* BVH4InstanceMeshRefitSAH (void* accel, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,Instance,InstancePrimitive>((BVH4*)accel,BVH4InstanceMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
-
-#if defined(__AVX__)
- Builder* BVH8InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
- Builder* BVH8InstanceMeshRefitSAH (void* accel, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,Instance,InstancePrimitive>((BVH8*)accel,BVH8InstanceMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
-#endif
-#endif
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.h
deleted file mode 100644
index 4aa9bdd7cc..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.h
+++ /dev/null
@@ -1,95 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../bvh/bvh.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int N>
- class BVHNRefitter
- {
- public:
-
- /*! Type shortcuts */
- typedef BVHN<N> BVH;
- typedef typename BVH::AABBNode AABBNode;
- typedef typename BVH::NodeRef NodeRef;
-
- struct LeafBoundsInterface {
- virtual const BBox3fa leafBounds(NodeRef& ref) const = 0;
- };
-
- public:
-
- /*! Constructor. */
- BVHNRefitter (BVH* bvh, const LeafBoundsInterface& leafBounds);
-
- /*! refits the BVH */
- void refit();
-
- private:
- /* single-threaded subtree extraction based on BVH depth */
- void gather_subtree_refs(NodeRef& ref,
- size_t &subtrees,
- const size_t depth = 0);
-
- /* single-threaded top-level refit */
- BBox3fa refit_toplevel(NodeRef& ref,
- size_t &subtrees,
- const BBox3fa *const subTreeBounds,
- const size_t depth = 0);
-
- /* single-threaded subtree refit */
- BBox3fa recurse_bottom(NodeRef& ref);
-
- public:
- BVH* bvh; //!< BVH to refit
- const LeafBoundsInterface& leafBounds; //!< calculates bounds of leaves
-
- static const size_t MAX_SUB_TREE_EXTRACTION_DEPTH = (N==4) ? 4 : (N==8) ? 3 : 3;
- static const size_t MAX_NUM_SUB_TREES = (N==4) ? 256 : (N==8) ? 512 : N*N*N; // N ^ MAX_SUB_TREE_EXTRACTION_DEPTH
- size_t numSubTrees;
- NodeRef subTrees[MAX_NUM_SUB_TREES];
- };
-
- template<int N, typename Mesh, typename Primitive>
- class BVHNRefitT : public Builder, public BVHNRefitter<N>::LeafBoundsInterface
- {
- public:
-
- /*! Type shortcuts */
- typedef BVHN<N> BVH;
- typedef typename BVH::AABBNode AABBNode;
- typedef typename BVH::NodeRef NodeRef;
-
- public:
- BVHNRefitT (BVH* bvh, Builder* builder, Mesh* mesh, size_t mode);
-
- virtual void build();
-
- virtual void clear();
-
- virtual const BBox3fa leafBounds (NodeRef& ref) const
- {
- size_t num; char* prim = ref.leaf(num);
- if (unlikely(ref == BVH::emptyNode)) return empty;
-
- BBox3fa bounds = empty;
- for (size_t i=0; i<num; i++)
- bounds.extend(((Primitive*)prim)[i].update(mesh));
- return bounds;
- }
-
- private:
- BVH* bvh;
- std::unique_ptr<Builder> builder;
- std::unique_ptr<BVHNRefitter<N>> refitter;
- Mesh* mesh;
- unsigned int topologyVersion;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.cpp
deleted file mode 100644
index 2bb431bf0e..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh_rotate.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Computes half surface area of box. */
- __forceinline float halfArea3f(const BBox<vfloat4>& box) {
- const vfloat4 d = box.size();
- const vfloat4 a = d*shuffle<1,2,0,3>(d);
- return a[0]+a[1]+a[2];
- }
-
- size_t BVHNRotate<4>::rotate(NodeRef parentRef, size_t depth)
- {
- /*! nothing to rotate if we reached a leaf node. */
- if (parentRef.isBarrier()) return 0;
- if (parentRef.isLeaf()) return 0;
- AABBNode* parent = parentRef.getAABBNode();
-
- /*! rotate all children first */
- vint4 cdepth;
- for (size_t c=0; c<4; c++)
- cdepth[c] = (int)rotate(parent->child(c),depth+1);
-
- /* compute current areas of all children */
- vfloat4 sizeX = parent->upper_x-parent->lower_x;
- vfloat4 sizeY = parent->upper_y-parent->lower_y;
- vfloat4 sizeZ = parent->upper_z-parent->lower_z;
- vfloat4 childArea = madd(sizeX,(sizeY + sizeZ),sizeY*sizeZ);
-
- /*! get node bounds */
- BBox<vfloat4> child1_0,child1_1,child1_2,child1_3;
- parent->bounds(child1_0,child1_1,child1_2,child1_3);
-
- /*! Find best rotation. We pick a first child (child1) and a sub-child
- (child2child) of a different second child (child2), and swap child1
- and child2child. We perform the best such swap. */
- float bestArea = 0;
- size_t bestChild1 = -1, bestChild2 = -1, bestChild2Child = -1;
- for (size_t c2=0; c2<4; c2++)
- {
- /*! ignore leaf nodes as we cannot descent into them */
- if (parent->child(c2).isBarrier()) continue;
- if (parent->child(c2).isLeaf()) continue;
- AABBNode* child2 = parent->child(c2).getAABBNode();
-
- /*! transpose child bounds */
- BBox<vfloat4> child2c0,child2c1,child2c2,child2c3;
- child2->bounds(child2c0,child2c1,child2c2,child2c3);
-
- /*! put child1_0 at each child2 position */
- float cost00 = halfArea3f(merge(child1_0,child2c1,child2c2,child2c3));
- float cost01 = halfArea3f(merge(child2c0,child1_0,child2c2,child2c3));
- float cost02 = halfArea3f(merge(child2c0,child2c1,child1_0,child2c3));
- float cost03 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_0));
- vfloat4 cost0 = vfloat4(cost00,cost01,cost02,cost03);
- vfloat4 min0 = vreduce_min(cost0);
- int pos0 = (int)bsf(movemask(min0 == cost0));
-
- /*! put child1_1 at each child2 position */
- float cost10 = halfArea3f(merge(child1_1,child2c1,child2c2,child2c3));
- float cost11 = halfArea3f(merge(child2c0,child1_1,child2c2,child2c3));
- float cost12 = halfArea3f(merge(child2c0,child2c1,child1_1,child2c3));
- float cost13 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_1));
- vfloat4 cost1 = vfloat4(cost10,cost11,cost12,cost13);
- vfloat4 min1 = vreduce_min(cost1);
- int pos1 = (int)bsf(movemask(min1 == cost1));
-
- /*! put child1_2 at each child2 position */
- float cost20 = halfArea3f(merge(child1_2,child2c1,child2c2,child2c3));
- float cost21 = halfArea3f(merge(child2c0,child1_2,child2c2,child2c3));
- float cost22 = halfArea3f(merge(child2c0,child2c1,child1_2,child2c3));
- float cost23 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_2));
- vfloat4 cost2 = vfloat4(cost20,cost21,cost22,cost23);
- vfloat4 min2 = vreduce_min(cost2);
- int pos2 = (int)bsf(movemask(min2 == cost2));
-
- /*! put child1_3 at each child2 position */
- float cost30 = halfArea3f(merge(child1_3,child2c1,child2c2,child2c3));
- float cost31 = halfArea3f(merge(child2c0,child1_3,child2c2,child2c3));
- float cost32 = halfArea3f(merge(child2c0,child2c1,child1_3,child2c3));
- float cost33 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_3));
- vfloat4 cost3 = vfloat4(cost30,cost31,cost32,cost33);
- vfloat4 min3 = vreduce_min(cost3);
- int pos3 = (int)bsf(movemask(min3 == cost3));
-
- /*! find best other child */
- vfloat4 area0123 = vfloat4(extract<0>(min0),extract<0>(min1),extract<0>(min2),extract<0>(min3)) - vfloat4(childArea[c2]);
- int pos[4] = { pos0,pos1,pos2,pos3 };
- const size_t mbd = BVH4::maxBuildDepth;
- vbool4 valid = vint4(int(depth+1))+cdepth <= vint4(mbd); // only select swaps that fulfill depth constraints
- valid &= vint4(int(c2)) != vint4(step);
- if (none(valid)) continue;
- size_t c1 = select_min(valid,area0123);
- float area = area0123[c1];
- if (c1 == c2) continue; // can happen if bounds are NANs
-
- /*! accept a swap when it reduces cost and is not swapping a node with itself */
- if (area < bestArea) {
- bestArea = area;
- bestChild1 = c1;
- bestChild2 = c2;
- bestChild2Child = pos[c1];
- }
- }
-
- /*! if we did not find a swap that improves the SAH then do nothing */
- if (bestChild1 == size_t(-1)) return 1+reduce_max(cdepth);
-
- /*! perform the best found tree rotation */
- AABBNode* child2 = parent->child(bestChild2).getAABBNode();
- AABBNode::swap(parent,bestChild1,child2,bestChild2Child);
- parent->setBounds(bestChild2,child2->bounds());
- AABBNode::compact(parent);
- AABBNode::compact(child2);
-
- /*! This returned depth is conservative as the child that was
- * pulled up in the tree could have been on the critical path. */
- cdepth[bestChild1]++; // bestChild1 was pushed down one level
- return 1+reduce_max(cdepth);
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.h
deleted file mode 100644
index 009bef339e..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.h
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int N>
- class BVHNRotate
- {
- typedef typename BVHN<N>::NodeRef NodeRef;
-
- public:
- static const bool enabled = false;
-
- static __forceinline size_t rotate(NodeRef parentRef, size_t depth = 1) { return 0; }
- static __forceinline void restructure(NodeRef ref, size_t depth = 1) {}
- };
-
- /* BVH4 tree rotations */
- template<>
- class BVHNRotate<4>
- {
- typedef BVH4::AABBNode AABBNode;
- typedef BVH4::NodeRef NodeRef;
-
- public:
- static const bool enabled = true;
-
- static size_t rotate(NodeRef parentRef, size_t depth = 1);
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp
deleted file mode 100644
index aa56035026..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp
+++ /dev/null
@@ -1,168 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "bvh_statistics.h"
-#include "../../common/algorithms/parallel_reduce.h"
-
-namespace embree
-{
- template<int N>
- BVHNStatistics<N>::BVHNStatistics (BVH* bvh) : bvh(bvh)
- {
- double A = max(0.0f,bvh->getLinearBounds().expectedHalfArea());
- stat = statistics(bvh->root,A,BBox1f(0.0f,1.0f));
- }
-
- template<int N>
- std::string BVHNStatistics<N>::str()
- {
- std::ostringstream stream;
- stream.setf(std::ios::fixed, std::ios::floatfield);
- stream << " primitives = " << bvh->numPrimitives << ", vertices = " << bvh->numVertices << ", depth = " << stat.depth << std::endl;
- size_t totalBytes = stat.bytes(bvh);
- double totalSAH = stat.sah(bvh);
- stream << " total : sah = " << std::setw(7) << std::setprecision(3) << totalSAH << " (100.00%), ";
- stream << "#bytes = " << std::setw(7) << std::setprecision(2) << totalBytes/1E6 << " MB (100.00%), ";
- stream << "#nodes = " << std::setw(7) << stat.size() << " (" << std::setw(6) << std::setprecision(2) << 100.0*stat.fillRate(bvh) << "% filled), ";
- stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(totalBytes)/double(bvh->numPrimitives) << std::endl;
- if (stat.statAABBNodes.numNodes ) stream << " getAABBNodes : " << stat.statAABBNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
- if (stat.statOBBNodes.numNodes ) stream << " ungetAABBNodes : " << stat.statOBBNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
- if (stat.statAABBNodesMB.numNodes ) stream << " getAABBNodesMB : " << stat.statAABBNodesMB.toString(bvh,totalSAH,totalBytes) << std::endl;
- if (stat.statAABBNodesMB4D.numNodes) stream << " getAABBNodesMB4D : " << stat.statAABBNodesMB4D.toString(bvh,totalSAH,totalBytes) << std::endl;
- if (stat.statOBBNodesMB.numNodes) stream << " ungetAABBNodesMB : " << stat.statOBBNodesMB.toString(bvh,totalSAH,totalBytes) << std::endl;
- if (stat.statQuantizedNodes.numNodes ) stream << " quantizedNodes : " << stat.statQuantizedNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
- if (true) stream << " leaves : " << stat.statLeaf.toString(bvh,totalSAH,totalBytes) << std::endl;
- if (true) stream << " histogram : " << stat.statLeaf.histToString() << std::endl;
- return stream.str();
- }
-
- template<int N>
- typename BVHNStatistics<N>::Statistics BVHNStatistics<N>::statistics(NodeRef node, const double A, const BBox1f t0t1)
- {
- Statistics s;
- assert(t0t1.size() > 0.0f);
- double dt = max(0.0f,t0t1.size());
- if (node.isAABBNode())
- {
- AABBNode* n = node.getAABBNode();
- s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
- if (n->child(i) == BVH::emptyNode) return Statistics();
- const double Ai = max(0.0f,halfArea(n->extend(i)));
- Statistics s = statistics(n->child(i),Ai,t0t1);
- s.statAABBNodes.numChildren++;
- return s;
- }, Statistics::add);
- s.statAABBNodes.numNodes++;
- s.statAABBNodes.nodeSAH += dt*A;
- s.depth++;
- }
- else if (node.isOBBNode())
- {
- OBBNode* n = node.ungetAABBNode();
- s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
- if (n->child(i) == BVH::emptyNode) return Statistics();
- const double Ai = max(0.0f,halfArea(n->extent(i)));
- Statistics s = statistics(n->child(i),Ai,t0t1);
- s.statOBBNodes.numChildren++;
- return s;
- }, Statistics::add);
- s.statOBBNodes.numNodes++;
- s.statOBBNodes.nodeSAH += dt*A;
- s.depth++;
- }
- else if (node.isAABBNodeMB())
- {
- AABBNodeMB* n = node.getAABBNodeMB();
- s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
- if (n->child(i) == BVH::emptyNode) return Statistics();
- const double Ai = max(0.0f,n->expectedHalfArea(i,t0t1));
- Statistics s = statistics(n->child(i),Ai,t0t1);
- s.statAABBNodesMB.numChildren++;
- return s;
- }, Statistics::add);
- s.statAABBNodesMB.numNodes++;
- s.statAABBNodesMB.nodeSAH += dt*A;
- s.depth++;
- }
- else if (node.isAABBNodeMB4D())
- {
- AABBNodeMB4D* n = node.getAABBNodeMB4D();
- s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
- if (n->child(i) == BVH::emptyNode) return Statistics();
- const BBox1f t0t1i = intersect(t0t1,n->timeRange(i));
- assert(!t0t1i.empty());
- const double Ai = n->AABBNodeMB::expectedHalfArea(i,t0t1i);
- Statistics s = statistics(n->child(i),Ai,t0t1i);
- s.statAABBNodesMB4D.numChildren++;
- return s;
- }, Statistics::add);
- s.statAABBNodesMB4D.numNodes++;
- s.statAABBNodesMB4D.nodeSAH += dt*A;
- s.depth++;
- }
- else if (node.isOBBNodeMB())
- {
- OBBNodeMB* n = node.ungetAABBNodeMB();
- s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
- if (n->child(i) == BVH::emptyNode) return Statistics();
- const double Ai = max(0.0f,halfArea(n->extent0(i)));
- Statistics s = statistics(n->child(i),Ai,t0t1);
- s.statOBBNodesMB.numChildren++;
- return s;
- }, Statistics::add);
- s.statOBBNodesMB.numNodes++;
- s.statOBBNodesMB.nodeSAH += dt*A;
- s.depth++;
- }
- else if (node.isQuantizedNode())
- {
- QuantizedNode* n = node.quantizedNode();
- s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
- if (n->child(i) == BVH::emptyNode) return Statistics();
- const double Ai = max(0.0f,halfArea(n->extent(i)));
- Statistics s = statistics(n->child(i),Ai,t0t1);
- s.statQuantizedNodes.numChildren++;
- return s;
- }, Statistics::add);
- s.statQuantizedNodes.numNodes++;
- s.statQuantizedNodes.nodeSAH += dt*A;
- s.depth++;
- }
- else if (node.isLeaf())
- {
- size_t num; const char* tri = node.leaf(num);
- if (num)
- {
- for (size_t i=0; i<num; i++)
- {
- const size_t bytes = bvh->primTy->getBytes(tri);
- s.statLeaf.numPrimsActive += bvh->primTy->sizeActive(tri);
- s.statLeaf.numPrimsTotal += bvh->primTy->sizeTotal(tri);
- s.statLeaf.numBytes += bytes;
- tri+=bytes;
- }
- s.statLeaf.numLeaves++;
- s.statLeaf.numPrimBlocks += num;
- s.statLeaf.leafSAH += dt*A*num;
- if (num-1 < Statistics::LeafStat::NHIST) {
- s.statLeaf.numPrimBlocksHistogram[num-1]++;
- }
- }
- }
- else {
- // -- GODOT start --
- // throw std::runtime_error("not supported node type in bvh_statistics");
- abort();
- // -- GODOT end --
- }
- return s;
- }
-
-#if defined(__AVX__)
- template class BVHNStatistics<8>;
-#endif
-
-#if !defined(__AVX__) || (!defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42)) || defined(__aarch64__)
- template class BVHNStatistics<4>;
-#endif
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.h
deleted file mode 100644
index 73dfc6fbcc..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.h
+++ /dev/null
@@ -1,285 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-#include <sstream>
-
-namespace embree
-{
- template<int N>
- class BVHNStatistics
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::AABBNode AABBNode;
- typedef typename BVH::OBBNode OBBNode;
- typedef typename BVH::AABBNodeMB AABBNodeMB;
- typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
- typedef typename BVH::OBBNodeMB OBBNodeMB;
- typedef typename BVH::QuantizedNode QuantizedNode;
-
- typedef typename BVH::NodeRef NodeRef;
-
- struct Statistics
- {
- template<typename Node>
- struct NodeStat
- {
- NodeStat ( double nodeSAH = 0,
- size_t numNodes = 0,
- size_t numChildren = 0)
- : nodeSAH(nodeSAH),
- numNodes(numNodes),
- numChildren(numChildren) {}
-
- double sah(BVH* bvh) const {
- return nodeSAH/bvh->getLinearBounds().expectedHalfArea();
- }
-
- size_t bytes() const {
- return numNodes*sizeof(Node);
- }
-
- size_t size() const {
- return numNodes;
- }
-
- double fillRateNom () const { return double(numChildren); }
- double fillRateDen () const { return double(numNodes*N); }
- double fillRate () const { return fillRateNom()/fillRateDen(); }
-
- __forceinline friend NodeStat operator+ ( const NodeStat& a, const NodeStat& b)
- {
- return NodeStat(a.nodeSAH + b.nodeSAH,
- a.numNodes+b.numNodes,
- a.numChildren+b.numChildren);
- }
-
- std::string toString(BVH* bvh, double sahTotal, size_t bytesTotal) const
- {
- std::ostringstream stream;
- stream.setf(std::ios::fixed, std::ios::floatfield);
- stream << "sah = " << std::setw(7) << std::setprecision(3) << sah(bvh);
- stream << " (" << std::setw(6) << std::setprecision(2) << 100.0*sah(bvh)/sahTotal << "%), ";
- stream << "#bytes = " << std::setw(7) << std::setprecision(2) << bytes()/1E6 << " MB ";
- stream << "(" << std::setw(6) << std::setprecision(2) << 100.0*double(bytes())/double(bytesTotal) << "%), ";
- stream << "#nodes = " << std::setw(7) << numNodes << " (" << std::setw(6) << std::setprecision(2) << 100.0*fillRate() << "% filled), ";
- stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytes())/double(bvh->numPrimitives);
- return stream.str();
- }
-
- public:
- double nodeSAH;
- size_t numNodes;
- size_t numChildren;
- };
-
- struct LeafStat
- {
- static const int NHIST = 8;
-
- LeafStat ( double leafSAH = 0.0f,
- size_t numLeaves = 0,
- size_t numPrimsActive = 0,
- size_t numPrimsTotal = 0,
- size_t numPrimBlocks = 0,
- size_t numBytes = 0)
- : leafSAH(leafSAH),
- numLeaves(numLeaves),
- numPrimsActive(numPrimsActive),
- numPrimsTotal(numPrimsTotal),
- numPrimBlocks(numPrimBlocks),
- numBytes(numBytes)
- {
- for (size_t i=0; i<NHIST; i++)
- numPrimBlocksHistogram[i] = 0;
- }
-
- double sah(BVH* bvh) const {
- return leafSAH/bvh->getLinearBounds().expectedHalfArea();
- }
-
- size_t bytes(BVH* bvh) const {
- return numBytes;
- }
-
- size_t size() const {
- return numLeaves;
- }
-
- double fillRateNom (BVH* bvh) const { return double(numPrimsActive); }
- double fillRateDen (BVH* bvh) const { return double(numPrimsTotal); }
- double fillRate (BVH* bvh) const { return fillRateNom(bvh)/fillRateDen(bvh); }
-
- __forceinline friend LeafStat operator+ ( const LeafStat& a, const LeafStat& b)
- {
- LeafStat stat(a.leafSAH + b.leafSAH,
- a.numLeaves+b.numLeaves,
- a.numPrimsActive+b.numPrimsActive,
- a.numPrimsTotal+b.numPrimsTotal,
- a.numPrimBlocks+b.numPrimBlocks,
- a.numBytes+b.numBytes);
- for (size_t i=0; i<NHIST; i++) {
- stat.numPrimBlocksHistogram[i] += a.numPrimBlocksHistogram[i];
- stat.numPrimBlocksHistogram[i] += b.numPrimBlocksHistogram[i];
- }
- return stat;
- }
-
- std::string toString(BVH* bvh, double sahTotal, size_t bytesTotal) const
- {
- std::ostringstream stream;
- stream.setf(std::ios::fixed, std::ios::floatfield);
- stream << "sah = " << std::setw(7) << std::setprecision(3) << sah(bvh);
- stream << " (" << std::setw(6) << std::setprecision(2) << 100.0*sah(bvh)/sahTotal << "%), ";
- stream << "#bytes = " << std::setw(7) << std::setprecision(2) << double(bytes(bvh))/1E6 << " MB ";
- stream << "(" << std::setw(6) << std::setprecision(2) << 100.0*double(bytes(bvh))/double(bytesTotal) << "%), ";
- stream << "#nodes = " << std::setw(7) << numLeaves << " (" << std::setw(6) << std::setprecision(2) << 100.0*fillRate(bvh) << "% filled), ";
- stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytes(bvh))/double(bvh->numPrimitives);
- return stream.str();
- }
-
- std::string histToString() const
- {
- std::ostringstream stream;
- stream.setf(std::ios::fixed, std::ios::floatfield);
- for (size_t i=0; i<NHIST; i++)
- stream << std::setw(6) << std::setprecision(2) << 100.0f*float(numPrimBlocksHistogram[i])/float(numLeaves) << "% ";
- return stream.str();
- }
-
- public:
- double leafSAH; //!< SAH of the leaves only
- size_t numLeaves; //!< Number of leaf nodes.
- size_t numPrimsActive; //!< Number of active primitives (
- size_t numPrimsTotal; //!< Number of active and inactive primitives
- size_t numPrimBlocks; //!< Number of primitive blocks.
- size_t numBytes; //!< Number of bytes of leaves.
- size_t numPrimBlocksHistogram[8];
- };
-
- public:
- Statistics (size_t depth = 0,
- LeafStat statLeaf = LeafStat(),
- NodeStat<AABBNode> statAABBNodes = NodeStat<AABBNode>(),
- NodeStat<OBBNode> statOBBNodes = NodeStat<OBBNode>(),
- NodeStat<AABBNodeMB> statAABBNodesMB = NodeStat<AABBNodeMB>(),
- NodeStat<AABBNodeMB4D> statAABBNodesMB4D = NodeStat<AABBNodeMB4D>(),
- NodeStat<OBBNodeMB> statOBBNodesMB = NodeStat<OBBNodeMB>(),
- NodeStat<QuantizedNode> statQuantizedNodes = NodeStat<QuantizedNode>())
-
- : depth(depth),
- statLeaf(statLeaf),
- statAABBNodes(statAABBNodes),
- statOBBNodes(statOBBNodes),
- statAABBNodesMB(statAABBNodesMB),
- statAABBNodesMB4D(statAABBNodesMB4D),
- statOBBNodesMB(statOBBNodesMB),
- statQuantizedNodes(statQuantizedNodes) {}
-
- double sah(BVH* bvh) const
- {
- return statLeaf.sah(bvh) +
- statAABBNodes.sah(bvh) +
- statOBBNodes.sah(bvh) +
- statAABBNodesMB.sah(bvh) +
- statAABBNodesMB4D.sah(bvh) +
- statOBBNodesMB.sah(bvh) +
- statQuantizedNodes.sah(bvh);
- }
-
- size_t bytes(BVH* bvh) const {
- return statLeaf.bytes(bvh) +
- statAABBNodes.bytes() +
- statOBBNodes.bytes() +
- statAABBNodesMB.bytes() +
- statAABBNodesMB4D.bytes() +
- statOBBNodesMB.bytes() +
- statQuantizedNodes.bytes();
- }
-
- size_t size() const
- {
- return statLeaf.size() +
- statAABBNodes.size() +
- statOBBNodes.size() +
- statAABBNodesMB.size() +
- statAABBNodesMB4D.size() +
- statOBBNodesMB.size() +
- statQuantizedNodes.size();
- }
-
- double fillRate (BVH* bvh) const
- {
- double nom = statLeaf.fillRateNom(bvh) +
- statAABBNodes.fillRateNom() +
- statOBBNodes.fillRateNom() +
- statAABBNodesMB.fillRateNom() +
- statAABBNodesMB4D.fillRateNom() +
- statOBBNodesMB.fillRateNom() +
- statQuantizedNodes.fillRateNom();
- double den = statLeaf.fillRateDen(bvh) +
- statAABBNodes.fillRateDen() +
- statOBBNodes.fillRateDen() +
- statAABBNodesMB.fillRateDen() +
- statAABBNodesMB4D.fillRateDen() +
- statOBBNodesMB.fillRateDen() +
- statQuantizedNodes.fillRateDen();
- return nom/den;
- }
-
- friend Statistics operator+ ( const Statistics& a, const Statistics& b )
- {
- return Statistics(max(a.depth,b.depth),
- a.statLeaf + b.statLeaf,
- a.statAABBNodes + b.statAABBNodes,
- a.statOBBNodes + b.statOBBNodes,
- a.statAABBNodesMB + b.statAABBNodesMB,
- a.statAABBNodesMB4D + b.statAABBNodesMB4D,
- a.statOBBNodesMB + b.statOBBNodesMB,
- a.statQuantizedNodes + b.statQuantizedNodes);
- }
-
- static Statistics add ( const Statistics& a, const Statistics& b ) {
- return a+b;
- }
-
- public:
- size_t depth;
- LeafStat statLeaf;
- NodeStat<AABBNode> statAABBNodes;
- NodeStat<OBBNode> statOBBNodes;
- NodeStat<AABBNodeMB> statAABBNodesMB;
- NodeStat<AABBNodeMB4D> statAABBNodesMB4D;
- NodeStat<OBBNodeMB> statOBBNodesMB;
- NodeStat<QuantizedNode> statQuantizedNodes;
- };
-
- public:
-
- /* Constructor gathers statistics. */
- BVHNStatistics (BVH* bvh);
-
- /*! Convert statistics into a string */
- std::string str();
-
- double sah() const {
- return stat.sah(bvh);
- }
-
- size_t bytesUsed() const {
- return stat.bytes(bvh);
- }
-
- private:
- Statistics statistics(NodeRef node, const double A, const BBox1f dt);
-
- private:
- BVH* bvh;
- Statistics stat;
- };
-
- typedef BVHNStatistics<4> BVH4Statistics;
- typedef BVHNStatistics<8> BVH8Statistics;
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser1.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser1.h
deleted file mode 100644
index 7f17084b81..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser1.h
+++ /dev/null
@@ -1,676 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-#include "node_intersector1.h"
-#include "../common/stack_item.h"
-
-#define NEW_SORTING_CODE 1
-
-namespace embree
-{
- namespace isa
- {
- /*! BVH regular node traversal for single rays. */
- template<int N, int Nx, int types>
- class BVHNNodeTraverser1Hit;
-
- /*! Helper functions for fast sorting using AVX512 instructions. */
-#if defined(__AVX512ER__)
-
- /* KNL code path */
- __forceinline void isort_update(vfloat16 &dist, vllong8 &ptr, const vfloat16 &d, const vllong8 &p)
- {
- const vfloat16 dist_shift = align_shift_right<15>(dist,dist);
- const vllong8 ptr_shift = align_shift_right<7>(ptr,ptr);
- const vbool16 m_geq = d >= dist;
- const vbool16 m_geq_shift = m_geq << 1;
- dist = select(m_geq,d,dist);
- ptr = select(vboold8(m_geq),p,ptr);
- dist = select(m_geq_shift,dist_shift,dist);
- ptr = select(vboold8(m_geq_shift),ptr_shift,ptr);
- }
-
- __forceinline void isort_quick_update(vfloat16 &dist, vllong8 &ptr, const vfloat16 &d, const vllong8 &p)
- {
- //dist = align_shift_right<15>(dist,d);
- //ptr = align_shift_right<7>(ptr,p);
- dist = align_shift_right<15>(dist,permute(d,vint16(zero)));
- ptr = align_shift_right<7>(ptr,permute(p,vllong8(zero)));
- }
-
- template<int N, int Nx, int types, class NodeRef, class BaseNode>
- __forceinline void traverseClosestHitAVX512(NodeRef& cur,
- size_t mask,
- const vfloat<Nx>& tNear,
- StackItemT<NodeRef>*& stackPtr,
- StackItemT<NodeRef>* stackEnd)
- {
- assert(mask != 0);
- const BaseNode* node = cur.baseNode();
-
- vllong8 children( vllong<N>::loadu((void*)node->children) );
- children = vllong8::compact((int)mask,children);
- vfloat16 distance = tNear;
- distance = vfloat16::compact((int)mask,distance,tNear);
-
- cur = toScalar(children);
- BVHN<N>::prefetch(cur,types);
-
- mask &= mask-1;
- if (likely(mask == 0)) return;
-
- /* 2 hits: order A0 B0 */
- const vllong8 c0(children);
- const vfloat16 d0(distance);
- children = align_shift_right<1>(children,children);
- distance = align_shift_right<1>(distance,distance);
- const vllong8 c1(children);
- const vfloat16 d1(distance);
-
- cur = toScalar(children);
- BVHN<N>::prefetch(cur,types);
-
- /* a '<' keeps the order for equal distances, scenes like powerplant largely benefit from it */
- const vboolf16 m_dist = d0 < d1;
- const vfloat16 dist_A0 = select(m_dist, d0, d1);
- const vfloat16 dist_B0 = select(m_dist, d1, d0);
- const vllong8 ptr_A0 = select(vboold8(m_dist), c0, c1);
- const vllong8 ptr_B0 = select(vboold8(m_dist), c1, c0);
-
- mask &= mask-1;
- if (likely(mask == 0)) {
- cur = toScalar(ptr_A0);
- stackPtr[0].ptr = toScalar(ptr_B0);
- *(float*)&stackPtr[0].dist = toScalar(dist_B0);
- stackPtr++;
- return;
- }
-
- /* 3 hits: order A1 B1 C1 */
-
- children = align_shift_right<1>(children,children);
- distance = align_shift_right<1>(distance,distance);
-
- const vllong8 c2(children);
- const vfloat16 d2(distance);
-
- cur = toScalar(children);
- BVHN<N>::prefetch(cur,types);
-
- const vboolf16 m_dist1 = dist_A0 <= d2;
- const vfloat16 dist_tmp_B1 = select(m_dist1, d2, dist_A0);
- const vllong8 ptr_A1 = select(vboold8(m_dist1), ptr_A0, c2);
- const vllong8 ptr_tmp_B1 = select(vboold8(m_dist1), c2, ptr_A0);
-
- const vboolf16 m_dist2 = dist_B0 <= dist_tmp_B1;
- const vfloat16 dist_B1 = select(m_dist2, dist_B0 , dist_tmp_B1);
- const vfloat16 dist_C1 = select(m_dist2, dist_tmp_B1, dist_B0);
- const vllong8 ptr_B1 = select(vboold8(m_dist2), ptr_B0, ptr_tmp_B1);
- const vllong8 ptr_C1 = select(vboold8(m_dist2), ptr_tmp_B1, ptr_B0);
-
- mask &= mask-1;
- if (likely(mask == 0)) {
- cur = toScalar(ptr_A1);
- stackPtr[0].ptr = toScalar(ptr_C1);
- *(float*)&stackPtr[0].dist = toScalar(dist_C1);
- stackPtr[1].ptr = toScalar(ptr_B1);
- *(float*)&stackPtr[1].dist = toScalar(dist_B1);
- stackPtr+=2;
- return;
- }
-
- /* 4 hits: order A2 B2 C2 D2 */
-
- const vfloat16 dist_A1 = select(m_dist1, dist_A0, d2);
-
- children = align_shift_right<1>(children,children);
- distance = align_shift_right<1>(distance,distance);
-
- const vllong8 c3(children);
- const vfloat16 d3(distance);
-
- cur = toScalar(children);
- BVHN<N>::prefetch(cur,types);
-
- const vboolf16 m_dist3 = dist_A1 <= d3;
- const vfloat16 dist_tmp_B2 = select(m_dist3, d3, dist_A1);
- const vllong8 ptr_A2 = select(vboold8(m_dist3), ptr_A1, c3);
- const vllong8 ptr_tmp_B2 = select(vboold8(m_dist3), c3, ptr_A1);
-
- const vboolf16 m_dist4 = dist_B1 <= dist_tmp_B2;
- const vfloat16 dist_B2 = select(m_dist4, dist_B1 , dist_tmp_B2);
- const vfloat16 dist_tmp_C2 = select(m_dist4, dist_tmp_B2, dist_B1);
- const vllong8 ptr_B2 = select(vboold8(m_dist4), ptr_B1, ptr_tmp_B2);
- const vllong8 ptr_tmp_C2 = select(vboold8(m_dist4), ptr_tmp_B2, ptr_B1);
-
- const vboolf16 m_dist5 = dist_C1 <= dist_tmp_C2;
- const vfloat16 dist_C2 = select(m_dist5, dist_C1 , dist_tmp_C2);
- const vfloat16 dist_D2 = select(m_dist5, dist_tmp_C2, dist_C1);
- const vllong8 ptr_C2 = select(vboold8(m_dist5), ptr_C1, ptr_tmp_C2);
- const vllong8 ptr_D2 = select(vboold8(m_dist5), ptr_tmp_C2, ptr_C1);
-
- mask &= mask-1;
- if (likely(mask == 0)) {
- cur = toScalar(ptr_A2);
- stackPtr[0].ptr = toScalar(ptr_D2);
- *(float*)&stackPtr[0].dist = toScalar(dist_D2);
- stackPtr[1].ptr = toScalar(ptr_C2);
- *(float*)&stackPtr[1].dist = toScalar(dist_C2);
- stackPtr[2].ptr = toScalar(ptr_B2);
- *(float*)&stackPtr[2].dist = toScalar(dist_B2);
- stackPtr+=3;
- return;
- }
-
- /* >=5 hits: reverse to descending order for writing to stack */
-
- const size_t hits = 4 + popcnt(mask);
- const vfloat16 dist_A2 = select(m_dist3, dist_A1, d3);
- vfloat16 dist(neg_inf);
- vllong8 ptr(zero);
-
-
- isort_quick_update(dist,ptr,dist_A2,ptr_A2);
- isort_quick_update(dist,ptr,dist_B2,ptr_B2);
- isort_quick_update(dist,ptr,dist_C2,ptr_C2);
- isort_quick_update(dist,ptr,dist_D2,ptr_D2);
-
- do {
-
- children = align_shift_right<1>(children,children);
- distance = align_shift_right<1>(distance,distance);
-
- cur = toScalar(children);
- BVHN<N>::prefetch(cur,types);
-
- const vfloat16 new_dist(permute(distance,vint16(zero)));
- const vllong8 new_ptr(permute(children,vllong8(zero)));
-
- mask &= mask-1;
- isort_update(dist,ptr,new_dist,new_ptr);
-
- } while(mask);
-
- const vboold8 m_stack_ptr(0x55); // 10101010 (lsb -> msb)
- const vboolf16 m_stack_dist(0x4444); // 0010001000100010 (lsb -> msb)
-
- /* extract current noderef */
- cur = toScalar(permute(ptr,vllong8(hits-1)));
- /* rearrange pointers to beginning of 16 bytes block */
- vllong8 stackElementA0;
- stackElementA0 = vllong8::expand(m_stack_ptr,ptr,stackElementA0);
- /* put distances in between */
- vuint16 stackElementA1((__m512i)stackElementA0);
- stackElementA1 = vuint16::expand(m_stack_dist,asUInt(dist),stackElementA1);
- /* write out first 4 x 16 bytes block to stack */
- vuint16::storeu(stackPtr,stackElementA1);
- /* get upper half of dist and ptr */
- dist = align_shift_right<4>(dist,dist);
- ptr = align_shift_right<4>(ptr,ptr);
- /* assemble and write out second block */
- vllong8 stackElementB0;
- stackElementB0 = vllong8::expand(m_stack_ptr,ptr,stackElementB0);
- vuint16 stackElementB1((__m512i)stackElementB0);
- stackElementB1 = vuint16::expand(m_stack_dist,asUInt(dist),stackElementB1);
- vuint16::storeu(stackPtr + 4,stackElementB1);
- /* increase stack pointer */
- stackPtr += hits-1;
- }
-#endif
-
-#if defined(__AVX512VL__) // SKX
-
- template<int N>
- __forceinline void isort_update(vint<N> &dist, const vint<N> &d)
- {
- const vint<N> dist_shift = align_shift_right<N-1>(dist,dist);
- const vboolf<N> m_geq = d >= dist;
- const vboolf<N> m_geq_shift = m_geq << 1;
- dist = select(m_geq,d,dist);
- dist = select(m_geq_shift,dist_shift,dist);
- }
-
- template<int N>
- __forceinline void isort_quick_update(vint<N> &dist, const vint<N> &d) {
- dist = align_shift_right<N-1>(dist,permute(d,vint<N>(zero)));
- }
-
- __forceinline size_t permuteExtract(const vint8& index, const vllong4& n0, const vllong4& n1) {
- return toScalar(permutex2var((__m256i)index,n0,n1));
- }
-
- __forceinline float permuteExtract(const vint8& index, const vfloat8& n) {
- return toScalar(permute(n,index));
- }
-
-#endif
-
- /* Specialization for BVH4. */
- template<int Nx, int types>
- class BVHNNodeTraverser1Hit<4, Nx, types>
- {
- typedef BVH4 BVH;
- typedef BVH4::NodeRef NodeRef;
- typedef BVH4::BaseNode BaseNode;
-
-
- public:
- /* Traverses a node with at least one hit child. Optimized for finding the closest hit (intersection). */
- static __forceinline void traverseClosestHit(NodeRef& cur,
- size_t mask,
- const vfloat<Nx>& tNear,
- StackItemT<NodeRef>*& stackPtr,
- StackItemT<NodeRef>* stackEnd)
- {
- assert(mask != 0);
-#if defined(__AVX512ER__)
- traverseClosestHitAVX512<4,Nx,types,NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd);
-#else
- const BaseNode* node = cur.baseNode();
-
- /*! one child is hit, continue with that child */
- size_t r = bscf(mask);
- cur = node->child(r);
- BVH::prefetch(cur,types);
- if (likely(mask == 0)) {
- assert(cur != BVH::emptyNode);
- return;
- }
-
- /*! two children are hit, push far child, and continue with closer child */
- NodeRef c0 = cur;
- const unsigned int d0 = ((unsigned int*)&tNear)[r];
- r = bscf(mask);
- NodeRef c1 = node->child(r);
- BVH::prefetch(c1,types);
- const unsigned int d1 = ((unsigned int*)&tNear)[r];
- assert(c0 != BVH::emptyNode);
- assert(c1 != BVH::emptyNode);
- if (likely(mask == 0)) {
- assert(stackPtr < stackEnd);
- if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; }
- else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; }
- }
-
-#if NEW_SORTING_CODE == 1
- vint4 s0((size_t)c0,(size_t)d0);
- vint4 s1((size_t)c1,(size_t)d1);
- r = bscf(mask);
- NodeRef c2 = node->child(r); BVH::prefetch(c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r];
- vint4 s2((size_t)c2,(size_t)d2);
- /* 3 hits */
- if (likely(mask == 0)) {
- StackItemT<NodeRef>::sort3(s0,s1,s2);
- *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1;
- cur = toSizeT(s2);
- stackPtr+=2;
- return;
- }
- r = bscf(mask);
- NodeRef c3 = node->child(r); BVH::prefetch(c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r];
- vint4 s3((size_t)c3,(size_t)d3);
- /* 4 hits */
- StackItemT<NodeRef>::sort4(s0,s1,s2,s3);
- *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2;
- cur = toSizeT(s3);
- stackPtr+=3;
-#else
- /*! Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there. */
- assert(stackPtr < stackEnd);
- stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
- assert(stackPtr < stackEnd);
- stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;
-
- /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
- assert(stackPtr < stackEnd);
- r = bscf(mask);
- NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
- assert(c != BVH::emptyNode);
- if (likely(mask == 0)) {
- sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
- cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
- return;
- }
-
- /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
- assert(stackPtr < stackEnd);
- r = bscf(mask);
- c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
- assert(c != BVH::emptyNode);
- sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
- cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
-#endif
-#endif
- }
-
- /* Traverses a node with at least one hit child. Optimized for finding any hit (occlusion). */
- static __forceinline void traverseAnyHit(NodeRef& cur,
- size_t mask,
- const vfloat<Nx>& tNear,
- NodeRef*& stackPtr,
- NodeRef* stackEnd)
- {
- const BaseNode* node = cur.baseNode();
-
- /*! one child is hit, continue with that child */
- size_t r = bscf(mask);
- cur = node->child(r);
- BVH::prefetch(cur,types);
-
- /* simpler in sequence traversal order */
- assert(cur != BVH::emptyNode);
- if (likely(mask == 0)) return;
- assert(stackPtr < stackEnd);
- *stackPtr = cur; stackPtr++;
-
- for (; ;)
- {
- r = bscf(mask);
- cur = node->child(r); BVH::prefetch(cur,types);
- assert(cur != BVH::emptyNode);
- if (likely(mask == 0)) return;
- assert(stackPtr < stackEnd);
- *stackPtr = cur; stackPtr++;
- }
- }
- };
-
- /* Specialization for BVH8. */
- template<int Nx, int types>
- class BVHNNodeTraverser1Hit<8, Nx, types>
- {
- typedef BVH8 BVH;
- typedef BVH8::NodeRef NodeRef;
- typedef BVH8::BaseNode BaseNode;
-
-#if defined(__AVX512VL__)
- template<class NodeRef, class BaseNode>
- static __forceinline void traverseClosestHitAVX512VL8(NodeRef& cur,
- size_t mask,
- const vfloat8& tNear,
- StackItemT<NodeRef>*& stackPtr,
- StackItemT<NodeRef>* stackEnd)
- {
- assert(mask != 0);
- const BaseNode* node = cur.baseNode();
- const vllong4 n0 = vllong4::loadu((vllong4*)&node->children[0]);
- const vllong4 n1 = vllong4::loadu((vllong4*)&node->children[4]);
- vint8 distance_i = (asInt(tNear) & 0xfffffff8) | vint8(step);
- distance_i = vint8::compact((int)mask,distance_i,distance_i);
- cur = permuteExtract(distance_i,n0,n1);
- BVH::prefetch(cur,types);
-
- mask &= mask-1;
- if (likely(mask == 0)) return;
-
- /* 2 hits: order A0 B0 */
- const vint8 d0(distance_i);
- const vint8 d1(shuffle<1>(distance_i));
- cur = permuteExtract(d1,n0,n1);
- BVH::prefetch(cur,types);
-
- const vint8 dist_A0 = min(d0, d1);
- const vint8 dist_B0 = max(d0, d1);
- assert(dist_A0[0] < dist_B0[0]);
-
- mask &= mask-1;
- if (likely(mask == 0)) {
- cur = permuteExtract(dist_A0,n0,n1);
- stackPtr[0].ptr = permuteExtract(dist_B0,n0,n1);
- *(float*)&stackPtr[0].dist = permuteExtract(dist_B0,tNear);
- stackPtr++;
- return;
- }
-
- /* 3 hits: order A1 B1 C1 */
-
- const vint8 d2(shuffle<2>(distance_i));
- cur = permuteExtract(d2,n0,n1);
- BVH::prefetch(cur,types);
-
- const vint8 dist_A1 = min(dist_A0,d2);
- const vint8 dist_tmp_B1 = max(dist_A0,d2);
- const vint8 dist_B1 = min(dist_B0,dist_tmp_B1);
- const vint8 dist_C1 = max(dist_B0,dist_tmp_B1);
- assert(dist_A1[0] < dist_B1[0]);
- assert(dist_B1[0] < dist_C1[0]);
-
- mask &= mask-1;
- if (likely(mask == 0)) {
- cur = permuteExtract(dist_A1,n0,n1);
- stackPtr[0].ptr = permuteExtract(dist_C1,n0,n1);
- *(float*)&stackPtr[0].dist = permuteExtract(dist_C1,tNear);
- stackPtr[1].ptr = permuteExtract(dist_B1,n0,n1);
- *(float*)&stackPtr[1].dist = permuteExtract(dist_B1,tNear);
- stackPtr+=2;
- return;
- }
-
- /* 4 hits: order A2 B2 C2 D2 */
-
- const vint8 d3(shuffle<3>(distance_i));
- cur = permuteExtract(d3,n0,n1);
- BVH::prefetch(cur,types);
-
- const vint8 dist_A2 = min(dist_A1,d3);
- const vint8 dist_tmp_B2 = max(dist_A1,d3);
- const vint8 dist_B2 = min(dist_B1,dist_tmp_B2);
- const vint8 dist_tmp_C2 = max(dist_B1,dist_tmp_B2);
- const vint8 dist_C2 = min(dist_C1,dist_tmp_C2);
- const vint8 dist_D2 = max(dist_C1,dist_tmp_C2);
- assert(dist_A2[0] < dist_B2[0]);
- assert(dist_B2[0] < dist_C2[0]);
- assert(dist_C2[0] < dist_D2[0]);
-
- mask &= mask-1;
- if (likely(mask == 0)) {
- cur = permuteExtract(dist_A2,n0,n1);
- stackPtr[0].ptr = permuteExtract(dist_D2,n0,n1);
- *(float*)&stackPtr[0].dist = permuteExtract(dist_D2,tNear);
- stackPtr[1].ptr = permuteExtract(dist_C2,n0,n1);
- *(float*)&stackPtr[1].dist = permuteExtract(dist_C2,tNear);
- stackPtr[2].ptr = permuteExtract(dist_B2,n0,n1);
- *(float*)&stackPtr[2].dist = permuteExtract(dist_B2,tNear);
- stackPtr+=3;
- return;
- }
-
- /* >=5 hits: reverse to descending order for writing to stack */
-
- distance_i = align_shift_right<3>(distance_i,distance_i);
- const size_t hits = 4 + popcnt(mask);
- vint8 dist(INT_MIN); // this will work with -0.0f (0x80000000) as distance, isort_update uses >= to insert
-
- isort_quick_update(dist,dist_A2);
- isort_quick_update(dist,dist_B2);
- isort_quick_update(dist,dist_C2);
- isort_quick_update(dist,dist_D2);
-
- do {
-
- distance_i = align_shift_right<1>(distance_i,distance_i);
- cur = permuteExtract(distance_i,n0,n1);
- BVH::prefetch(cur,types);
- const vint8 new_dist(permute(distance_i,vint8(zero)));
- mask &= mask-1;
- isort_update(dist,new_dist);
-
- } while(mask);
-
- for (size_t i=0; i<7; i++)
- assert(dist[i+0]>=dist[i+1]);
-
- for (size_t i=0;i<hits-1;i++)
- {
- stackPtr->ptr = permuteExtract(dist,n0,n1);
- *(float*)&stackPtr->dist = permuteExtract(dist,tNear);
- dist = align_shift_right<1>(dist,dist);
- stackPtr++;
- }
- cur = permuteExtract(dist,n0,n1);
- }
-#endif
-
- public:
- static __forceinline void traverseClosestHit(NodeRef& cur,
- size_t mask,
- const vfloat<Nx>& tNear,
- StackItemT<NodeRef>*& stackPtr,
- StackItemT<NodeRef>* stackEnd)
- {
- assert(mask != 0);
-#if defined(__AVX512ER__)
- traverseClosestHitAVX512<8,Nx,types,NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd);
-#elif defined(__AVX512VL__)
- traverseClosestHitAVX512VL8<NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd);
-#else
-
- const BaseNode* node = cur.baseNode();
-
- /*! one child is hit, continue with that child */
- size_t r = bscf(mask);
- cur = node->child(r);
- BVH::prefetch(cur,types);
- if (likely(mask == 0)) {
- assert(cur != BVH::emptyNode);
- return;
- }
-
- /*! two children are hit, push far child, and continue with closer child */
- NodeRef c0 = cur;
- const unsigned int d0 = ((unsigned int*)&tNear)[r];
- r = bscf(mask);
- NodeRef c1 = node->child(r);
- BVH::prefetch(c1,types);
- const unsigned int d1 = ((unsigned int*)&tNear)[r];
-
- assert(c0 != BVH::emptyNode);
- assert(c1 != BVH::emptyNode);
- if (likely(mask == 0)) {
- assert(stackPtr < stackEnd);
- if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; }
- else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; }
- }
-#if NEW_SORTING_CODE == 1
- vint4 s0((size_t)c0,(size_t)d0);
- vint4 s1((size_t)c1,(size_t)d1);
-
- r = bscf(mask);
- NodeRef c2 = node->child(r); BVH::prefetch(c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r];
- vint4 s2((size_t)c2,(size_t)d2);
- /* 3 hits */
- if (likely(mask == 0)) {
- StackItemT<NodeRef>::sort3(s0,s1,s2);
- *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1;
- cur = toSizeT(s2);
- stackPtr+=2;
- return;
- }
- r = bscf(mask);
- NodeRef c3 = node->child(r); BVH::prefetch(c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r];
- vint4 s3((size_t)c3,(size_t)d3);
- /* 4 hits */
- if (likely(mask == 0)) {
- StackItemT<NodeRef>::sort4(s0,s1,s2,s3);
- *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2;
- cur = toSizeT(s3);
- stackPtr+=3;
- return;
- }
- *(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2; *(vint4*)&stackPtr[3] = s3;
- /*! fallback case if more than 4 children are hit */
- StackItemT<NodeRef>* stackFirst = stackPtr;
- stackPtr+=4;
- while (1)
- {
- assert(stackPtr < stackEnd);
- r = bscf(mask);
- NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = *(unsigned int*)&tNear[r];
- const vint4 s((size_t)c,(size_t)d);
- *(vint4*)stackPtr++ = s;
- assert(c != BVH::emptyNode);
- if (unlikely(mask == 0)) break;
- }
- sort(stackFirst,stackPtr);
- cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
-#else
- /*! Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there. */
- assert(stackPtr < stackEnd);
- stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
- assert(stackPtr < stackEnd);
- stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;
-
- /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
- assert(stackPtr < stackEnd);
- r = bscf(mask);
- NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
- assert(c != BVH::emptyNode);
- if (likely(mask == 0)) {
- sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
- cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
- return;
- }
-
- /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
- assert(stackPtr < stackEnd);
- r = bscf(mask);
- c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
- assert(c != BVH::emptyNode);
- if (likely(mask == 0)) {
- sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
- cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
- return;
- }
- /*! fallback case if more than 4 children are hit */
- StackItemT<NodeRef>* stackFirst = stackPtr-4;
- while (1)
- {
- assert(stackPtr < stackEnd);
- r = bscf(mask);
- c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
- assert(c != BVH::emptyNode);
- if (unlikely(mask == 0)) break;
- }
- sort(stackFirst,stackPtr);
- cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
-#endif
-#endif
- }
-
- static __forceinline void traverseAnyHit(NodeRef& cur,
- size_t mask,
- const vfloat<Nx>& tNear,
- NodeRef*& stackPtr,
- NodeRef* stackEnd)
- {
- const BaseNode* node = cur.baseNode();
-
- /*! one child is hit, continue with that child */
- size_t r = bscf(mask);
- cur = node->child(r);
- BVH::prefetch(cur,types);
-
- /* simpler in sequence traversal order */
- assert(cur != BVH::emptyNode);
- if (likely(mask == 0)) return;
- assert(stackPtr < stackEnd);
- *stackPtr = cur; stackPtr++;
-
- for (; ;)
- {
- r = bscf(mask);
- cur = node->child(r); BVH::prefetch(cur,types);
- assert(cur != BVH::emptyNode);
- if (likely(mask == 0)) return;
- assert(stackPtr < stackEnd);
- *stackPtr = cur; stackPtr++;
- }
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser_stream.h b/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser_stream.h
deleted file mode 100644
index 9c603babf0..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser_stream.h
+++ /dev/null
@@ -1,154 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-#include "../common/ray.h"
-#include "../common/stack_item.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int N, int Nx, int types>
- class BVHNNodeTraverserStreamHitCoherent
- {
- typedef BVHN<N> BVH;
- typedef typename BVH::NodeRef NodeRef;
- typedef typename BVH::BaseNode BaseNode;
-
- public:
- template<class T>
- static __forceinline void traverseClosestHit(NodeRef& cur,
- size_t& m_trav_active,
- const vbool<Nx>& vmask,
- const vfloat<Nx>& tNear,
- const T* const tMask,
- StackItemMaskCoherent*& stackPtr)
- {
- const NodeRef parent = cur;
- size_t mask = movemask(vmask);
- assert(mask != 0);
- const BaseNode* node = cur.baseNode();
-
- /*! one child is hit, continue with that child */
- const size_t r0 = bscf(mask);
- assert(r0 < 8);
- cur = node->child(r0);
- BVHN<N>::prefetch(cur,types);
- m_trav_active = tMask[r0];
- assert(cur != BVH::emptyNode);
- if (unlikely(mask == 0)) return;
-
- const unsigned int* const tNear_i = (unsigned int*)&tNear;
-
- /*! two children are hit, push far child, and continue with closer child */
- NodeRef c0 = cur;
- unsigned int d0 = tNear_i[r0];
- const size_t r1 = bscf(mask);
- assert(r1 < 8);
- NodeRef c1 = node->child(r1);
- BVHN<N>::prefetch(c1,types);
- unsigned int d1 = tNear_i[r1];
-
- assert(c0 != BVH::emptyNode);
- assert(c1 != BVH::emptyNode);
- if (likely(mask == 0)) {
- if (d0 < d1) {
- assert(tNear[r1] >= 0.0f);
- stackPtr->mask = tMask[r1];
- stackPtr->parent = parent;
- stackPtr->child = c1;
- stackPtr++;
- cur = c0;
- m_trav_active = tMask[r0];
- return;
- }
- else {
- assert(tNear[r0] >= 0.0f);
- stackPtr->mask = tMask[r0];
- stackPtr->parent = parent;
- stackPtr->child = c0;
- stackPtr++;
- cur = c1;
- m_trav_active = tMask[r1];
- return;
- }
- }
-
- /*! slow path for more than two hits */
- size_t hits = movemask(vmask);
- const vint<Nx> dist_i = select(vmask, (asInt(tNear) & 0xfffffff8) | vint<Nx>(step), 0);
- #if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL
- const vint<N> tmp = extractN<N,0>(dist_i);
- const vint<Nx> dist_i_sorted = usort_descending(tmp);
- #else
- const vint<Nx> dist_i_sorted = usort_descending(dist_i);
- #endif
- const vint<Nx> sorted_index = dist_i_sorted & 7;
-
- size_t i = 0;
- for (;;)
- {
- const unsigned int index = sorted_index[i];
- assert(index < 8);
- cur = node->child(index);
- m_trav_active = tMask[index];
- assert(m_trav_active);
- BVHN<N>::prefetch(cur,types);
- bscf(hits);
- if (unlikely(hits==0)) break;
- i++;
- assert(cur != BVH::emptyNode);
- assert(tNear[index] >= 0.0f);
- stackPtr->mask = m_trav_active;
- stackPtr->parent = parent;
- stackPtr->child = cur;
- stackPtr++;
- }
- }
-
- template<class T>
- static __forceinline void traverseAnyHit(NodeRef& cur,
- size_t& m_trav_active,
- const vbool<Nx>& vmask,
- const T* const tMask,
- StackItemMaskCoherent*& stackPtr)
- {
- const NodeRef parent = cur;
- size_t mask = movemask(vmask);
- assert(mask != 0);
- const BaseNode* node = cur.baseNode();
-
- /*! one child is hit, continue with that child */
- size_t r = bscf(mask);
- cur = node->child(r);
- BVHN<N>::prefetch(cur,types);
- m_trav_active = tMask[r];
-
- /* simple in order sequence */
- assert(cur != BVH::emptyNode);
- if (likely(mask == 0)) return;
- stackPtr->mask = m_trav_active;
- stackPtr->parent = parent;
- stackPtr->child = cur;
- stackPtr++;
-
- for (; ;)
- {
- r = bscf(mask);
- cur = node->child(r);
- BVHN<N>::prefetch(cur,types);
- m_trav_active = tMask[r];
- assert(cur != BVH::emptyNode);
- if (likely(mask == 0)) return;
- stackPtr->mask = m_trav_active;
- stackPtr->parent = parent;
- stackPtr->child = cur;
- stackPtr++;
- }
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector.h b/thirdparty/embree-aarch64/kernels/bvh/node_intersector.h
deleted file mode 100644
index a978c0c459..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bvh.h"
-
-namespace embree
-{
- namespace isa
- {
- struct NearFarPrecalculations
- {
- size_t nearX, nearY, nearZ;
- size_t farX, farY, farZ;
-
- __forceinline NearFarPrecalculations() {}
-
- __forceinline NearFarPrecalculations(const Vec3fa& dir, size_t N)
- {
- const size_t size = sizeof(float)*N;
- nearX = (dir.x < 0.0f) ? 1*size : 0*size;
- nearY = (dir.y < 0.0f) ? 3*size : 2*size;
- nearZ = (dir.z < 0.0f) ? 5*size : 4*size;
- farX = nearX ^ size;
- farY = nearY ^ size;
- farZ = nearZ ^ size;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector1.h b/thirdparty/embree-aarch64/kernels/bvh/node_intersector1.h
deleted file mode 100644
index aa0d4ba4d7..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector1.h
+++ /dev/null
@@ -1,1788 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "node_intersector.h"
-
-#if defined(__AVX2__)
-#define __FMA_X4__
-#endif
-
-#if defined(__aarch64__)
-#define __FMA_X4__
-#endif
-
-
-namespace embree
-{
- namespace isa
- {
- //////////////////////////////////////////////////////////////////////////////////////
- // Ray structure used in single-ray traversal
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int Nx, bool robust>
- struct TravRayBase;
-
- /* Base (without tnear and tfar) */
- template<int N, int Nx>
- struct TravRayBase<N,Nx,false>
- {
- __forceinline TravRayBase() {}
-
- __forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir)
- : org_xyz(ray_org), dir_xyz(ray_dir)
- {
- const Vec3fa ray_rdir = rcp_safe(ray_dir);
- org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z);
- dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z);
- rdir = Vec3vf<N>(ray_rdir.x,ray_rdir.y,ray_rdir.z);
-#if defined(__FMA_X4__)
- const Vec3fa ray_org_rdir = ray_org*ray_rdir;
-#if !defined(__aarch64__)
- org_rdir = Vec3vf<N>(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z);
-#else
- //for aarch64, we do not have msub equal instruction, so we negeate orig and use madd
- //x86 will use msub
- neg_org_rdir = Vec3vf<N>(-ray_org_rdir.x,-ray_org_rdir.y,-ray_org_rdir.z);
-#endif
-#endif
- nearX = ray_rdir.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>);
- nearY = ray_rdir.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>);
- nearZ = ray_rdir.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>);
- farX = nearX ^ sizeof(vfloat<N>);
- farY = nearY ^ sizeof(vfloat<N>);
- farZ = nearZ ^ sizeof(vfloat<N>);
-
-#if defined(__AVX512ER__) // KNL+
- /* optimization works only for 8-wide BVHs with 16-wide SIMD */
- const vint<16> id(step);
- const vint<16> id2 = align_shift_right<16/2>(id, id);
- permX = select(vfloat<16>(dir.x) >= 0.0f, id, id2);
- permY = select(vfloat<16>(dir.y) >= 0.0f, id, id2);
- permZ = select(vfloat<16>(dir.z) >= 0.0f, id, id2);
-#endif
-
- }
-
- template<int K>
- __forceinline TravRayBase(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
- const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
- size_t flip = sizeof(vfloat<N>))
- {
- org = Vec3vf<Nx>(ray_org.x[k], ray_org.y[k], ray_org.z[k]);
- dir = Vec3vf<Nx>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]);
- rdir = Vec3vf<Nx>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
-#if defined(__FMA_X4__)
-#if !defined(__aarch64__)
- org_rdir = org*rdir;
-#else
- neg_org_rdir = -(org*rdir);
-#endif
-#endif
- nearX = nearXYZ.x[k];
- nearY = nearXYZ.y[k];
- nearZ = nearXYZ.z[k];
- farX = nearX ^ flip;
- farY = nearY ^ flip;
- farZ = nearZ ^ flip;
-
-#if defined(__AVX512ER__) // KNL+
- /* optimization works only for 8-wide BVHs with 16-wide SIMD */
- const vint<16> id(step);
- const vint<16> id2 = align_shift_right<16/2>(id, id);
- permX = select(vfloat<16>(dir.x) >= 0.0f, id, id2);
- permY = select(vfloat<16>(dir.y) >= 0.0f, id, id2);
- permZ = select(vfloat<16>(dir.z) >= 0.0f, id, id2);
-#endif
- }
-
- Vec3fa org_xyz, dir_xyz;
- Vec3vf<Nx> org, dir, rdir;
-#if defined(__FMA_X4__)
-#if !defined(__aarch64__)
- Vec3vf<Nx> org_rdir;
-#else
- //aarch64 version are keeping negation of the org_rdir and use madd
- //x86 uses msub
- Vec3vf<Nx> neg_org_rdir;
-#endif
-#endif
-#if defined(__AVX512ER__) // KNL+
- vint16 permX, permY, permZ;
-#endif
-
- size_t nearX, nearY, nearZ;
- size_t farX, farY, farZ;
- };
-
- /* Base (without tnear and tfar) */
- template<int N, int Nx>
- struct TravRayBase<N,Nx,true>
- {
- __forceinline TravRayBase() {}
-
- __forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir)
- : org_xyz(ray_org), dir_xyz(ray_dir)
- {
- const float round_down = 1.0f-3.0f*float(ulp);
- const float round_up = 1.0f+3.0f*float(ulp);
- const Vec3fa ray_rdir = 1.0f/zero_fix(ray_dir);
- const Vec3fa ray_rdir_near = round_down*ray_rdir;
- const Vec3fa ray_rdir_far = round_up *ray_rdir;
- org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z);
- dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z);
- rdir_near = Vec3vf<N>(ray_rdir_near.x,ray_rdir_near.y,ray_rdir_near.z);
- rdir_far = Vec3vf<N>(ray_rdir_far .x,ray_rdir_far .y,ray_rdir_far .z);
- nearX = ray_rdir_near.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>);
- nearY = ray_rdir_near.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>);
- nearZ = ray_rdir_near.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>);
- farX = nearX ^ sizeof(vfloat<N>);
- farY = nearY ^ sizeof(vfloat<N>);
- farZ = nearZ ^ sizeof(vfloat<N>);
-
-#if defined(__AVX512ER__) // KNL+
- /* optimization works only for 8-wide BVHs with 16-wide SIMD */
- const vint<16> id(step);
- const vint<16> id2 = align_shift_right<16/2>(id, id);
- permX = select(vfloat<16>(dir.x) >= 0.0f, id, id2);
- permY = select(vfloat<16>(dir.y) >= 0.0f, id, id2);
- permZ = select(vfloat<16>(dir.z) >= 0.0f, id, id2);
-#endif
- }
-
- template<int K>
- __forceinline TravRayBase(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
- const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
- size_t flip = sizeof(vfloat<N>))
- {
- const vfloat<Nx> round_down = 1.0f-3.0f*float(ulp);
- const vfloat<Nx> round_up = 1.0f+3.0f*float(ulp);
- org = Vec3vf<Nx>(ray_org.x[k], ray_org.y[k], ray_org.z[k]);
- dir = Vec3vf<Nx>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]);
- rdir_near = round_down*Vec3vf<Nx>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
- rdir_far = round_up *Vec3vf<Nx>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
-
- nearX = nearXYZ.x[k];
- nearY = nearXYZ.y[k];
- nearZ = nearXYZ.z[k];
- farX = nearX ^ flip;
- farY = nearY ^ flip;
- farZ = nearZ ^ flip;
-
-#if defined(__AVX512ER__) // KNL+
- /* optimization works only for 8-wide BVHs with 16-wide SIMD */
- const vint<16> id(step);
- const vint<16> id2 = align_shift_right<16/2>(id, id);
- permX = select(vfloat<16>(dir.x) >= 0.0f, id, id2);
- permY = select(vfloat<16>(dir.y) >= 0.0f, id, id2);
- permZ = select(vfloat<16>(dir.z) >= 0.0f, id, id2);
-#endif
- }
-
- Vec3fa org_xyz, dir_xyz;
- Vec3vf<Nx> org, dir, rdir_near, rdir_far;
-#if defined(__AVX512ER__) // KNL+
- vint16 permX, permY, permZ;
-#endif
-
- size_t nearX, nearY, nearZ;
- size_t farX, farY, farZ;
- };
-
- /* Full (with tnear and tfar) */
- template<int N, int Nx, bool robust>
- struct TravRay : TravRayBase<N,Nx,robust>
- {
- __forceinline TravRay() {}
-
- __forceinline TravRay(const Vec3fa& ray_org, const Vec3fa& ray_dir, float ray_tnear, float ray_tfar)
- : TravRayBase<N,Nx,robust>(ray_org, ray_dir),
- tnear(ray_tnear), tfar(ray_tfar) {}
-
- template<int K>
- __forceinline TravRay(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
- const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
- float ray_tnear, float ray_tfar,
- size_t flip = sizeof(vfloat<N>))
- : TravRayBase<N,Nx,robust>(k, ray_org, ray_dir, ray_rdir, nearXYZ, flip),
- tnear(ray_tnear), tfar(ray_tfar) {}
-
- vfloat<Nx> tnear;
- vfloat<Nx> tfar;
- };
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Point Query structure used in single-ray traversal
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N>
- struct TravPointQuery
- {
- __forceinline TravPointQuery() {}
-
- __forceinline TravPointQuery(const Vec3fa& query_org, const Vec3fa& query_rad)
- {
- org = Vec3vf<N>(query_org.x, query_org.y, query_org.z);
- rad = Vec3vf<N>(query_rad.x, query_rad.y, query_rad.z);
- }
-
- __forceinline vfloat<N> const& tfar() const {
- return rad.x;
- }
-
- Vec3vf<N> org, rad;
- };
-
- //////////////////////////////////////////////////////////////////////////////////////
- // point query
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N>
- __forceinline size_t pointQuerySphereDistAndMask(
- const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX,
- vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ)
- {
- const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x;
- const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y;
- const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z;
- dist = vX * vX + vY * vY + vZ * vZ;
- const vbool<N> vmask = dist <= query.tfar()*query.tfar();
- const vbool<N> valid = minX <= maxX;
- return movemask(vmask) & movemask(valid);
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
- {
- const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x));
- const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y));
- const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z));
- const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x));
- const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y));
- const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z));
- return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x);
- const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y);
- const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z);
- const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x);
- const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y);
- const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z);
- const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0]));
- const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0]));
- const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0]));
- const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0]));
- const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0]));
- const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0]));
- return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeSphereMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
- size_t mask = pointQueryNodeSphere(node, query, time, dist);
-
- if (unlikely(ref.isAABBNodeMB4D())) {
- const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
- const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t);
- mask &= movemask(vmask);
- }
-
- return mask;
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
- {
- const vfloat<N> start_x(node->start.x);
- const vfloat<N> scale_x(node->scale.x);
- const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
- const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
- const vfloat<N> start_y(node->start.y);
- const vfloat<N> scale_y(node->scale.y);
- const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
- const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
- const vfloat<N> start_z(node->start.z);
- const vfloat<N> scale_z(node->scale.z);
- const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
- const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
- return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask());
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- const vfloat<N> minX = node->dequantizeLowerX(time);
- const vfloat<N> maxX = node->dequantizeUpperX(time);
- const vfloat<N> minY = node->dequantizeLowerY(time);
- const vfloat<N> maxY = node->dequantizeUpperY(time);
- const vfloat<N> minZ = node->dequantizeLowerZ(time);
- const vfloat<N> maxZ = node->dequantizeUpperZ(time);
- return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask());
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
- {
- // TODO: point query - implement
- const vbool<N> vmask = vbool<N>(true);
- const size_t mask = movemask(vmask) & ((1<<N)-1);
- dist = vfloat<N>(0.0f);
- return mask;
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- // TODO: point query - implement
- const vbool<N> vmask = vbool<N>(true);
- const size_t mask = movemask(vmask) & ((1<<N)-1);
- dist = vfloat<N>(0.0f);
- return mask;
- }
-
- template<int N>
- __forceinline size_t pointQueryAABBDistAndMask(
- const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX,
- vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ)
- {
- const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x;
- const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y;
- const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z;
- dist = vX * vX + vY * vY + vZ * vZ;
- const vbool<N> valid = minX <= maxX;
- const vbool<N> vmask = !((maxX < query.org.x - query.rad.x) | (minX > query.org.x + query.rad.x) |
- (maxY < query.org.y - query.rad.y) | (minY > query.org.y + query.rad.y) |
- (maxZ < query.org.z - query.rad.z) | (minZ > query.org.z + query.rad.z));
- return movemask(vmask) & movemask(valid);
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
- {
- const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x));
- const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y));
- const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z));
- const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x));
- const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y));
- const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z));
- return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x);
- const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y);
- const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z);
- const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x);
- const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y);
- const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z);
- const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0]));
- const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0]));
- const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0]));
- const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0]));
- const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0]));
- const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0]));
- return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeAABBMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
- size_t mask = pointQueryNodeAABB(node, query, time, dist);
-
- if (unlikely(ref.isAABBNodeMB4D())) {
- const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
- const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t);
- mask &= movemask(vmask);
- }
-
- return mask;
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat<N> start_x(node->start.x);
- const vfloat<N> scale_x(node->scale.x);
- const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
- const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
- const vfloat<N> start_y(node->start.y);
- const vfloat<N> scale_y(node->scale.y);
- const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
- const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
- const vfloat<N> start_z(node->start.z);
- const vfloat<N> scale_z(node->scale.z);
- const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
- const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
- return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid;
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat<N> minX = node->dequantizeLowerX(time);
- const vfloat<N> maxX = node->dequantizeUpperX(time);
- const vfloat<N> minY = node->dequantizeLowerY(time);
- const vfloat<N> maxY = node->dequantizeUpperY(time);
- const vfloat<N> minZ = node->dequantizeLowerZ(time);
- const vfloat<N> maxZ = node->dequantizeUpperZ(time);
- return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid;
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
- {
- // TODO: point query - implement
- const vbool<N> vmask = vbool<N>(true);
- const size_t mask = movemask(vmask) & ((1<<N)-1);
- dist = vfloat<N>(0.0f);
- return mask;
- }
-
- template<int N>
- __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- // TODO: point query - implement
- const vbool<N> vmask = vbool<N>(true);
- const size_t mask = movemask(vmask) & ((1<<N)-1);
- dist = vfloat<N>(0.0f);
- return mask;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast AABBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int Nx, bool robust>
- __forceinline size_t intersectNode(const typename BVHN<N>::AABBNode* node, const TravRay<N,Nx,robust>& ray, vfloat<Nx>& dist);
-
- template<>
- __forceinline size_t intersectNode<4,4>(const typename BVH4::AABBNode* node, const TravRay<4,4,false>& ray, vfloat4& dist)
- {
-#if defined(__FMA_X4__)
-#if defined(__aarch64__)
- const vfloat4 tNearX = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat4 tNearY = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat4 tNearZ = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat4 tFarX = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat4 tFarY = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat4 tFarZ = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.neg_org_rdir.z);
-#else
- const vfloat4 tNearX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x);
- const vfloat4 tNearY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y);
- const vfloat4 tNearZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z);
- const vfloat4 tFarX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x);
- const vfloat4 tFarY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y);
- const vfloat4 tFarZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z);
-#endif
-#else
- const vfloat4 tNearX = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x;
- const vfloat4 tNearY = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y;
- const vfloat4 tNearZ = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z;
- const vfloat4 tFarX = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x;
- const vfloat4 tFarY = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y;
- const vfloat4 tFarZ = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z;
-#endif
-
-#if defined(__aarch64__)
- const vfloat4 tNear = maxi(tNearX, tNearY, tNearZ, ray.tnear);
- const vfloat4 tFar = mini(tFarX, tFarY, tFarZ, ray.tfar);
- const vbool4 vmask = asInt(tNear) <= asInt(tFar);
- const size_t mask = movemask(vmask);
-#elif defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW
- const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool4 vmask = asInt(tNear) > asInt(tFar);
- const size_t mask = movemask(vmask) ^ ((1<<4)-1);
-#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool4 vmask = asInt(tNear) <= asInt(tFar);
- const size_t mask = movemask(vmask);
-#else
- const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool4 vmask = tNear <= tFar;
- const size_t mask = movemask(vmask);
-#endif
- dist = tNear;
- return mask;
- }
-
-#if defined(__AVX__)
-
- template<>
- __forceinline size_t intersectNode<8,8>(const typename BVH8::AABBNode* node, const TravRay<8,8,false>& ray, vfloat8& dist)
- {
-#if defined(__AVX2__)
-#if defined(__aarch64__)
- const vfloat8 tNearX = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat8 tNearY = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat8 tNearZ = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat8 tFarX = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat8 tFarY = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat8 tFarZ = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.neg_org_rdir.z);
-#else
- const vfloat8 tNearX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x);
- const vfloat8 tNearY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y);
- const vfloat8 tNearZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z);
- const vfloat8 tFarX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x);
- const vfloat8 tFarY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y);
- const vfloat8 tFarZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z);
-#endif
-
-#else
- const vfloat8 tNearX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x;
- const vfloat8 tNearY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y;
- const vfloat8 tNearZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z;
- const vfloat8 tFarX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x;
- const vfloat8 tFarY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y;
- const vfloat8 tFarZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z;
-#endif
-
-#if defined(__AVX2__) && !defined(__AVX512F__) // HSW
- const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool8 vmask = asInt(tNear) > asInt(tFar);
- const size_t mask = movemask(vmask) ^ ((1<<8)-1);
-#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool8 vmask = asInt(tNear) <= asInt(tFar);
- const size_t mask = movemask(vmask);
-#else
- const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool8 vmask = tNear <= tFar;
- const size_t mask = movemask(vmask);
-#endif
- dist = tNear;
- return mask;
- }
-
-#endif
-
-#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL
-
- template<>
- __forceinline size_t intersectNode<4,16>(const typename BVH4::AABBNode* node, const TravRay<4,16,false>& ray, vfloat16& dist)
- {
- const vfloat16 tNearX = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x);
- const vfloat16 tNearY = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y);
- const vfloat16 tNearZ = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z);
- const vfloat16 tFarX = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x);
- const vfloat16 tFarY = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y);
- const vfloat16 tFarZ = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z);
- const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool16 vmask = le(vbool16(0xf),tNear,tFar);
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
- template<>
- __forceinline size_t intersectNode<8,16>(const typename BVH8::AABBNode* node, const TravRay<8,16,false>& ray, vfloat16& dist)
- {
- const vllong8 invalid((size_t)BVH8::emptyNode);
- const vboold8 m_valid(invalid != vllong8::loadu(node->children));
- const vfloat16 bminmaxX = permute(vfloat16::load((const float*)&node->lower_x), ray.permX);
- const vfloat16 bminmaxY = permute(vfloat16::load((const float*)&node->lower_y), ray.permY);
- const vfloat16 bminmaxZ = permute(vfloat16::load((const float*)&node->lower_z), ray.permZ);
- const vfloat16 tNearFarX = msub(bminmaxX, ray.rdir.x, ray.org_rdir.x);
- const vfloat16 tNearFarY = msub(bminmaxY, ray.rdir.y, ray.org_rdir.y);
- const vfloat16 tNearFarZ = msub(bminmaxZ, ray.rdir.z, ray.org_rdir.z);
- const vfloat16 tNear = max(tNearFarX, tNearFarY, tNearFarZ, ray.tnear);
- const vfloat16 tFar = min(tNearFarX, tNearFarY, tNearFarZ, ray.tfar);
- const vbool16 vmask = le(vboolf16(m_valid),tNear,align_shift_right<8>(tFar, tFar));
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
-#endif
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Robust AABBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int Nx>
- __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNode* node, const TravRay<N,Nx,true>& ray, vfloat<Nx>& dist)
- {
- const vfloat<N> tNearX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x;
- const vfloat<N> tNearY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y;
- const vfloat<N> tNearZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z;
- const vfloat<N> tFarX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x;
- const vfloat<N> tFarY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y;
- const vfloat<N> tFarZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z;
- const vfloat<N> tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat<N> tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool<N> vmask = tNear <= tFar;
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
-#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL
-
- template<>
- __forceinline size_t intersectNodeRobust<4,16>(const typename BVHN<4>::AABBNode* node, const TravRay<4,16,true>& ray, vfloat<16>& dist)
- {
- const vfloat16 tNearX = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x;
- const vfloat16 tNearY = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y;
- const vfloat16 tNearZ = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z;
- const vfloat16 tFarX = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x;
- const vfloat16 tFarY = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y;
- const vfloat16 tFarZ = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z;
- const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool16 vmask = le((1 << 4)-1,tNear,tFar);
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
- template<>
- __forceinline size_t intersectNodeRobust<8,16>(const typename BVHN<8>::AABBNode* node, const TravRay<8,16,true>& ray, vfloat<16>& dist)
- {
- const vfloat16 tNearX = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x;
- const vfloat16 tNearY = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y;
- const vfloat16 tNearZ = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z;
- const vfloat16 tFarX = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x;
- const vfloat16 tFarY = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y;
- const vfloat16 tFarZ = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z;
- const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool16 vmask = le((1 << 8)-1,tNear,tFar);
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
-#endif
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast AABBNodeMB intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N>
- __forceinline size_t intersectNode(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,N,false>& ray, const float time, vfloat<N>& dist)
- {
- const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
- const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
- const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
- const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
- const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
- const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
-#if defined(__FMA_X4__)
-#if defined(__aarch64__)
- const vfloat<N> tNearX = madd(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<N> tNearY = madd(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<N> tNearZ = madd(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<N> tFarX = madd(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<N> tFarY = madd(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<N> tFarZ = madd(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.neg_org_rdir.z);
-#else
- const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x);
- const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y);
- const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z);
- const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x);
- const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y);
- const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z);
-#endif
-#else
- const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x;
- const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y;
- const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z;
- const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x;
- const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y;
- const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z;
-#endif
-#if defined(__FMA_X4__) && !defined(__AVX512F__) // HSW
- const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool<N> vmask = asInt(tNear) > asInt(tFar);
- const size_t mask = movemask(vmask) ^ ((1<<N)-1);
-#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool<N> vmask = asInt(tNear) <= asInt(tFar);
- const size_t mask = movemask(vmask);
-#else
- const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
- const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
- const vbool<N> vmask = tNear <= tFar;
- const size_t mask = movemask(vmask);
-#endif
- dist = tNear;
- return mask;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Robust AABBNodeMB intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N>
- __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,N,true>& ray, const float time, vfloat<N>& dist)
- {
- const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
- const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
- const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
- const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x;
- const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y;
- const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z;
- const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
- const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
- const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
- const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
- const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x;
- const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y;
- const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z;
- const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ);
- const size_t mask = movemask(tNear <= tFar);
- dist = tNear;
- return mask;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast AABBNodeMB4D intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N>
- __forceinline size_t intersectNodeMB4D(const typename BVHN<N>::NodeRef ref, const TravRay<N,N,false>& ray, const float time, vfloat<N>& dist)
- {
- const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
-
- const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
- const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
- const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
- const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
- const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
- const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
-#if defined (__FMA_X4__)
-#if defined(__aarch64__)
- const vfloat<N> tNearX = madd(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<N> tNearY = madd(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<N> tNearZ = madd(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<N> tFarX = madd(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<N> tFarY = madd(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<N> tFarZ = madd(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.neg_org_rdir.z);
-#else
- const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x);
- const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y);
- const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z);
- const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x);
- const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y);
- const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z);
-#endif
-#else
- const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x;
- const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y;
- const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z;
- const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x;
- const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y;
- const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z;
-#endif
-#if defined(__FMA_X4__) && !defined(__AVX512F__)
- const vfloat<N> tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray.tnear));
- const vfloat<N> tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray.tfar ));
-#else
- const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
- const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
-#endif
- vbool<N> vmask = tNear <= tFar;
- if (unlikely(ref.isAABBNodeMB4D())) {
- const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
- vmask &= (node1->lower_t <= time) & (time < node1->upper_t);
- }
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Robust AABBNodeMB4D intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N>
- __forceinline size_t intersectNodeMB4DRobust(const typename BVHN<N>::NodeRef ref, const TravRay<N,N,true>& ray, const float time, vfloat<N>& dist)
- {
- const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
-
- const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
- const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
- const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
- const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x;
- const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y;
- const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z;
- const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
- const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
- const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
- const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
- const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x;
- const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y;
- const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z;
- const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ);
- vbool<N> vmask = tNear <= tFar;
- if (unlikely(ref.isAABBNodeMB4D())) {
- const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
- vmask &= (node1->lower_t <= time) & (time < node1->upper_t);
- }
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast QuantizedBaseNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int Nx, bool robust>
- __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,Nx,robust>& ray, vfloat<Nx>& dist);
-
- template<>
- __forceinline size_t intersectNode<4,4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,4,false>& ray, vfloat4& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat4 start_x(node->start.x);
- const vfloat4 scale_x(node->scale.x);
- const vfloat4 lower_x = madd(node->dequantize<4>(ray.nearX >> 2),scale_x,start_x);
- const vfloat4 upper_x = madd(node->dequantize<4>(ray.farX >> 2),scale_x,start_x);
- const vfloat4 start_y(node->start.y);
- const vfloat4 scale_y(node->scale.y);
- const vfloat4 lower_y = madd(node->dequantize<4>(ray.nearY >> 2),scale_y,start_y);
- const vfloat4 upper_y = madd(node->dequantize<4>(ray.farY >> 2),scale_y,start_y);
- const vfloat4 start_z(node->start.z);
- const vfloat4 scale_z(node->scale.z);
- const vfloat4 lower_z = madd(node->dequantize<4>(ray.nearZ >> 2),scale_z,start_z);
- const vfloat4 upper_z = madd(node->dequantize<4>(ray.farZ >> 2),scale_z,start_z);
-
-#if defined(__FMA_X4__)
-#if defined(__aarch64__)
- const vfloat4 tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat4 tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat4 tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat4 tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat4 tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat4 tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
-#else
- const vfloat4 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat4 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat4 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat4 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat4 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat4 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
-#endif
-#else
- const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir.x;
- const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir.y;
- const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
- const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir.x;
- const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir.y;
- const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
-#endif
-
-#if (defined(__aarch64__) && defined(BUILD_IOS)) || defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW
- const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool4 vmask = asInt(tNear) > asInt(tFar);
- const size_t mask = movemask(vmask) ^ ((1<<4)-1);
-#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool4 vmask = asInt(tNear) <= asInt(tFar);
- const size_t mask = movemask(vmask);
-#else
- const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool4 vmask = tNear <= tFar;
- const size_t mask = movemask(vmask);
-#endif
- dist = tNear;
- return mask & mvalid;
- }
-
- template<>
- __forceinline size_t intersectNode<4,4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,4,true>& ray, vfloat4& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat4 start_x(node->start.x);
- const vfloat4 scale_x(node->scale.x);
- const vfloat4 lower_x = madd(node->dequantize<4>(ray.nearX >> 2),scale_x,start_x);
- const vfloat4 upper_x = madd(node->dequantize<4>(ray.farX >> 2),scale_x,start_x);
- const vfloat4 start_y(node->start.y);
- const vfloat4 scale_y(node->scale.y);
- const vfloat4 lower_y = madd(node->dequantize<4>(ray.nearY >> 2),scale_y,start_y);
- const vfloat4 upper_y = madd(node->dequantize<4>(ray.farY >> 2),scale_y,start_y);
- const vfloat4 start_z(node->start.z);
- const vfloat4 scale_z(node->scale.z);
- const vfloat4 lower_z = madd(node->dequantize<4>(ray.nearZ >> 2),scale_z,start_z);
- const vfloat4 upper_z = madd(node->dequantize<4>(ray.farZ >> 2),scale_z,start_z);
-
- const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
- const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
- const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
- const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
- const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
- const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
-
- const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool4 vmask = tNear <= tFar;
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask & mvalid;
- }
-
-
-#if defined(__AVX__)
-
- template<>
- __forceinline size_t intersectNode<8,8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,8,false>& ray, vfloat8& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat8 start_x(node->start.x);
- const vfloat8 scale_x(node->scale.x);
- const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x);
- const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x);
- const vfloat8 start_y(node->start.y);
- const vfloat8 scale_y(node->scale.y);
- const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y);
- const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y);
- const vfloat8 start_z(node->start.z);
- const vfloat8 scale_z(node->scale.z);
- const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z);
- const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z);
-
-#if defined(__AVX2__)
-#if defined(__aarch64__)
- const vfloat8 tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat8 tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat8 tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat8 tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat8 tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat8 tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
-#else
- const vfloat8 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat8 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat8 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat8 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat8 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat8 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
-#endif
-#else
- const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir.x;
- const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir.y;
- const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
- const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir.x;
- const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir.y;
- const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
-#endif
-
-#if defined(__AVX2__) && !defined(__AVX512F__) // HSW
- const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool8 vmask = asInt(tNear) > asInt(tFar);
- const size_t mask = movemask(vmask) ^ ((1<<8)-1);
-#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool8 vmask = asInt(tNear) <= asInt(tFar);
- const size_t mask = movemask(vmask);
-#else
- const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool8 vmask = tNear <= tFar;
- const size_t mask = movemask(vmask);
-#endif
- dist = tNear;
- return mask & mvalid;
- }
-
- template<>
- __forceinline size_t intersectNode<8,8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,8,true>& ray, vfloat8& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat8 start_x(node->start.x);
- const vfloat8 scale_x(node->scale.x);
- const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x);
- const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x);
- const vfloat8 start_y(node->start.y);
- const vfloat8 scale_y(node->scale.y);
- const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y);
- const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y);
- const vfloat8 start_z(node->start.z);
- const vfloat8 scale_z(node->scale.z);
- const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z);
- const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z);
-
- const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
- const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
- const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
- const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
- const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
- const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
-
- const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool8 vmask = tNear <= tFar;
- const size_t mask = movemask(vmask);
-
- dist = tNear;
- return mask & mvalid;
- }
-
-
-#endif
-
-#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL
-
- template<>
- __forceinline size_t intersectNode<4,16>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,16,false>& ray, vfloat16& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat16 start_x(node->start.x);
- const vfloat16 scale_x(node->scale.x);
- const vfloat16 lower_x = madd(vfloat16(node->dequantize<4>(ray.nearX >> 2)),scale_x,start_x);
- const vfloat16 upper_x = madd(vfloat16(node->dequantize<4>(ray.farX >> 2)),scale_x,start_x);
- const vfloat16 start_y(node->start.y);
- const vfloat16 scale_y(node->scale.y);
- const vfloat16 lower_y = madd(vfloat16(node->dequantize<4>(ray.nearY >> 2)),scale_y,start_y);
- const vfloat16 upper_y = madd(vfloat16(node->dequantize<4>(ray.farY >> 2)),scale_y,start_y);
- const vfloat16 start_z(node->start.z);
- const vfloat16 scale_z(node->scale.z);
- const vfloat16 lower_z = madd(vfloat16(node->dequantize<4>(ray.nearZ >> 2)),scale_z,start_z);
- const vfloat16 upper_z = madd(vfloat16(node->dequantize<4>(ray.farZ >> 2)),scale_z,start_z);
-
- const vfloat16 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat16 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat16 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat16 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat16 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat16 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool16 vmask = le(vbool16(0xf),tNear,tFar);
- const size_t mask = movemask(vmask) & mvalid;
- dist = tNear;
- return mask;
- }
-
- template<>
- __forceinline size_t intersectNode<4,16>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,16,true>& ray, vfloat16& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat16 start_x(node->start.x);
- const vfloat16 scale_x(node->scale.x);
- const vfloat16 lower_x = madd(vfloat16(node->dequantize<4>(ray.nearX >> 2)),scale_x,start_x);
- const vfloat16 upper_x = madd(vfloat16(node->dequantize<4>(ray.farX >> 2)),scale_x,start_x);
- const vfloat16 start_y(node->start.y);
- const vfloat16 scale_y(node->scale.y);
- const vfloat16 lower_y = madd(vfloat16(node->dequantize<4>(ray.nearY >> 2)),scale_y,start_y);
- const vfloat16 upper_y = madd(vfloat16(node->dequantize<4>(ray.farY >> 2)),scale_y,start_y);
- const vfloat16 start_z(node->start.z);
- const vfloat16 scale_z(node->scale.z);
- const vfloat16 lower_z = madd(vfloat16(node->dequantize<4>(ray.nearZ >> 2)),scale_z,start_z);
- const vfloat16 upper_z = madd(vfloat16(node->dequantize<4>(ray.farZ >> 2)),scale_z,start_z);
-
- const vfloat16 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
- const vfloat16 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
- const vfloat16 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
- const vfloat16 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
- const vfloat16 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
- const vfloat16 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
-
- const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
- const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
- const vbool16 vmask = le(vbool16(0xf),tNear,tFar);
- const size_t mask = movemask(vmask) & mvalid;
- dist = tNear;
- return mask;
- }
-
- template<>
- __forceinline size_t intersectNode<8,16>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,16,false>& ray, vfloat16& dist)
- {
- const vbool16 m_valid(node->validMask16());
- const vfloat16 bminmaxX = node->dequantizeLowerUpperX(ray.permX);
- const vfloat16 bminmaxY = node->dequantizeLowerUpperY(ray.permY);
- const vfloat16 bminmaxZ = node->dequantizeLowerUpperZ(ray.permZ);
- const vfloat16 tNearFarX = msub(bminmaxX, ray.rdir.x, ray.org_rdir.x);
- const vfloat16 tNearFarY = msub(bminmaxY, ray.rdir.y, ray.org_rdir.y);
- const vfloat16 tNearFarZ = msub(bminmaxZ, ray.rdir.z, ray.org_rdir.z);
- const vfloat16 tNear = max(tNearFarX, tNearFarY, tNearFarZ, ray.tnear);
- const vfloat16 tFar = min(tNearFarX, tNearFarY, tNearFarZ, ray.tfar);
- const vbool16 vmask = le(m_valid,tNear,align_shift_right<8>(tFar, tFar));
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
- template<>
- __forceinline size_t intersectNode<8,16>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,16,true>& ray, vfloat16& dist)
- {
- const vbool16 m_valid(node->validMask16());
- const vfloat16 bminmaxX = node->dequantizeLowerUpperX(ray.permX);
- const vfloat16 bminmaxY = node->dequantizeLowerUpperY(ray.permY);
- const vfloat16 bminmaxZ = node->dequantizeLowerUpperZ(ray.permZ);
- const vfloat16 tNearFarX = (bminmaxX - ray.org.x) * ray.rdir_far.x; // FIXME: this is not conservative !!!!!!!!!
- const vfloat16 tNearFarY = (bminmaxY - ray.org.y) * ray.rdir_far.y;
- const vfloat16 tNearFarZ = (bminmaxZ - ray.org.z) * ray.rdir_far.z;
- const vfloat16 tNear = max(tNearFarX, tNearFarY, tNearFarZ, ray.tnear);
- const vfloat16 tFar = min(tNearFarX, tNearFarY, tNearFarZ, ray.tfar);
- const vbool16 vmask = le(m_valid,tNear,align_shift_right<8>(tFar, tFar));
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
-
-#endif
-
-
- template<int N, int Nx>
- __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,Nx,false>& ray, const float time, vfloat<N>& dist)
- {
- const vboolf<N> mvalid = node->validMask();
- const vfloat<N> lower_x = node->dequantizeLowerX(time);
- const vfloat<N> upper_x = node->dequantizeUpperX(time);
- const vfloat<N> lower_y = node->dequantizeLowerY(time);
- const vfloat<N> upper_y = node->dequantizeUpperY(time);
- const vfloat<N> lower_z = node->dequantizeLowerZ(time);
- const vfloat<N> upper_z = node->dequantizeUpperZ(time);
-#if defined(__FMA_X4__)
-#if defined(__aarch64__)
- const vfloat<N> tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<N> tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<N> tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<N> tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<N> tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<N> tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
-#else
- const vfloat<N> tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat<N> tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat<N> tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat<N> tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat<N> tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat<N> tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
-#endif
-#else
- const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir.x;
- const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir.y;
- const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
- const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir.x;
- const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir.y;
- const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
-#endif
-
- const vfloat<N> tminX = mini(tNearX,tFarX);
- const vfloat<N> tmaxX = maxi(tNearX,tFarX);
- const vfloat<N> tminY = mini(tNearY,tFarY);
- const vfloat<N> tmaxY = maxi(tNearY,tFarY);
- const vfloat<N> tminZ = mini(tNearZ,tFarZ);
- const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ);
- const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear);
- const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar);
-#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar));
-#else
- const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid;
-#endif
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
- template<int N, int Nx>
- __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,Nx,true>& ray, const float time, vfloat<N>& dist)
- {
- const vboolf<N> mvalid = node->validMask();
- const vfloat<N> lower_x = node->dequantizeLowerX(time);
- const vfloat<N> upper_x = node->dequantizeUpperX(time);
- const vfloat<N> lower_y = node->dequantizeLowerY(time);
- const vfloat<N> upper_y = node->dequantizeUpperY(time);
- const vfloat<N> lower_z = node->dequantizeLowerZ(time);
- const vfloat<N> upper_z = node->dequantizeUpperZ(time);
- const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
- const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
- const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
- const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
- const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
- const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
-
- const vfloat<N> tminX = mini(tNearX,tFarX);
- const vfloat<N> tmaxX = maxi(tNearX,tFarX);
- const vfloat<N> tminY = mini(tNearY,tFarY);
- const vfloat<N> tmaxY = maxi(tNearY,tFarY);
- const vfloat<N> tminZ = mini(tNearZ,tFarZ);
- const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ);
- const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear);
- const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar);
-#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar));
-#else
- const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid;
-#endif
- const size_t mask = movemask(vmask);
- dist = tNear;
- return mask;
- }
-
-
-#if defined(__AVX512ER__)
- // for KNL
- template<>
- __forceinline size_t intersectNode<4,16>(const typename BVHN<4>::QuantizedBaseNodeMB* node, const TravRay<4,16,false>& ray, const float time, vfloat<4>& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat16 lower_x = node->dequantizeLowerX(time);
- const vfloat16 upper_x = node->dequantizeUpperX(time);
- const vfloat16 lower_y = node->dequantizeLowerY(time);
- const vfloat16 upper_y = node->dequantizeUpperY(time);
- const vfloat16 lower_z = node->dequantizeLowerZ(time);
- const vfloat16 upper_z = node->dequantizeUpperZ(time);
-
- const vfloat16 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat16 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat16 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat16 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat16 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat16 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
-
- const vfloat16 tminX = min(tNearX,tFarX);
- const vfloat16 tmaxX = max(tNearX,tFarX);
- const vfloat16 tminY = min(tNearY,tFarY);
- const vfloat16 tmaxY = max(tNearY,tFarY);
- const vfloat16 tminZ = min(tNearZ,tFarZ);
- const vfloat16 tmaxZ = max(tNearZ,tFarZ);
- const vfloat16 tNear = max(tminX,tminY,tminZ,ray.tnear);
- const vfloat16 tFar = min(tmaxX,tmaxY,tmaxZ,ray.tfar );
- const vbool16 vmask = tNear <= tFar;
- const size_t mask = movemask(vmask) & mvalid;
- dist = extractN<4,0>(tNear);
- return mask;
- }
-
-
- // for KNL
- template<>
- __forceinline size_t intersectNode<4,16>(const typename BVHN<4>::QuantizedBaseNodeMB* node, const TravRay<4,16,true>& ray, const float time, vfloat<4>& dist)
- {
- const size_t mvalid = movemask(node->validMask());
- const vfloat16 lower_x = node->dequantizeLowerX(time);
- const vfloat16 upper_x = node->dequantizeUpperX(time);
- const vfloat16 lower_y = node->dequantizeLowerY(time);
- const vfloat16 upper_y = node->dequantizeUpperY(time);
- const vfloat16 lower_z = node->dequantizeLowerZ(time);
- const vfloat16 upper_z = node->dequantizeUpperZ(time);
-
- const vfloat16 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
- const vfloat16 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
- const vfloat16 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
- const vfloat16 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
- const vfloat16 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
- const vfloat16 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
-
- const vfloat16 tminX = min(tNearX,tFarX);
- const vfloat16 tmaxX = max(tNearX,tFarX);
- const vfloat16 tminY = min(tNearY,tFarY);
- const vfloat16 tmaxY = max(tNearY,tFarY);
- const vfloat16 tminZ = min(tNearZ,tFarZ);
- const vfloat16 tmaxZ = max(tNearZ,tFarZ);
- const vfloat16 tNear = max(tminX,tminY,tminZ,ray.tnear);
- const vfloat16 tFar = min(tmaxX,tmaxY,tmaxZ,ray.tfar );
- const vbool16 vmask = tNear <= tFar;
- const size_t mask = movemask(vmask) & mvalid;
- dist = extractN<4,0>(tNear);
- return mask;
- }
-
-#endif
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast OBBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, bool robust>
- __forceinline size_t intersectNode(const typename BVHN<N>::OBBNode* node, const TravRay<N,N,robust>& ray, vfloat<N>& dist)
- {
- const Vec3vf<N> dir = xfmVector(node->naabb,ray.dir);
- //const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))/dir;
- const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))*rcp_safe(dir);
- const Vec3vf<N> org = xfmPoint(node->naabb,ray.org);
- const Vec3vf<N> tLowerXYZ = org * nrdir; // (Vec3fa(zero) - org) * rdir;
- const Vec3vf<N> tUpperXYZ = tLowerXYZ - nrdir; // (Vec3fa(one ) - org) * rdir;
-
- const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x);
- const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y);
- const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z);
- const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x);
- const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y);
- const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z);
- vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ);
- vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
- if (robust) {
- tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp));
- tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp));
- }
- const vbool<N> vmask = tNear <= tFar;
- dist = tNear;
- return movemask(vmask);
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast OBBNodeMB intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, bool robust>
- __forceinline size_t intersectNode(const typename BVHN<N>::OBBNodeMB* node, const TravRay<N,N,robust>& ray, const float time, vfloat<N>& dist)
- {
- const AffineSpace3vf<N> xfm = node->space0;
- const Vec3vf<N> b0_lower = zero;
- const Vec3vf<N> b0_upper = one;
- const Vec3vf<N> lower = lerp(b0_lower,node->b1.lower,vfloat<N>(time));
- const Vec3vf<N> upper = lerp(b0_upper,node->b1.upper,vfloat<N>(time));
-
- const BBox3vf<N> bounds(lower,upper);
- const Vec3vf<N> dir = xfmVector(xfm,ray.dir);
- const Vec3vf<N> rdir = rcp_safe(dir);
- const Vec3vf<N> org = xfmPoint(xfm,ray.org);
-
- const Vec3vf<N> tLowerXYZ = (bounds.lower - org) * rdir;
- const Vec3vf<N> tUpperXYZ = (bounds.upper - org) * rdir;
-
- const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x);
- const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y);
- const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z);
- const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x);
- const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y);
- const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z);
- vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ);
- vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
- if (robust) {
- tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp));
- tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp));
- }
- const vbool<N> vmask = tNear <= tFar;
- dist = tNear;
- return movemask(vmask);
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Node intersectors used in point query raversal
- //////////////////////////////////////////////////////////////////////////////////////
-
- /*! Computes traversal information for N nodes with 1 point query */
- template<int N, int types>
- struct BVHNNodePointQuerySphere1;
-
- template<int N>
- struct BVHNNodePointQuerySphere1<N, BVH_AN1>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = pointQueryNodeSphere(node.getAABBNode(), query, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQuerySphere1<N, BVH_AN2>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = pointQueryNodeSphereMB4D<N>(node, query, time, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQuerySphere1<N, BVH_AN1_UN1>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (likely(node.isAABBNode())) mask = pointQueryNodeSphere(node.getAABBNode(), query, dist);
- else if (unlikely(node.isOBBNode())) mask = pointQueryNodeSphere(node.ungetAABBNode(), query, dist);
- else return false;
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQuerySphere1<N, BVH_AN2_UN2>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (likely(node.isAABBNodeMB())) mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist);
- else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist);
- else return false;
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D_UN2>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist);
- else mask = pointQueryNodeSphereMB4D(node, query, time, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQuerySphere1<N, BVH_QN1>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = pointQueryNodeSphere((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNQuantizedBaseNodePointQuerySphere1
- {
- static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
- {
- return pointQueryNodeSphere(node,query,dist);
- }
-
- static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- return pointQueryNodeSphere(node,query,time,dist);
- }
- };
-
- /*! Computes traversal information for N nodes with 1 point query */
- template<int N, int types>
- struct BVHNNodePointQueryAABB1;
-
- template<int N>
- struct BVHNNodePointQueryAABB1<N, BVH_AN1>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = pointQueryNodeAABB(node.getAABBNode(), query, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQueryAABB1<N, BVH_AN2>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = pointQueryNodeAABBMB4D<N>(node, query, time, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQueryAABB1<N, BVH_AN1_UN1>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (likely(node.isAABBNode())) mask = pointQueryNodeAABB(node.getAABBNode(), query, dist);
- else if (unlikely(node.isOBBNode())) mask = pointQueryNodeAABB(node.ungetAABBNode(), query, dist);
- else return false;
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQueryAABB1<N, BVH_AN2_UN2>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (likely(node.isAABBNodeMB())) mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist);
- else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist);
- else return false;
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D_UN2>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist);
- else mask = pointQueryNodeAABBMB4D(node, query, time, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNNodePointQueryAABB1<N, BVH_QN1>
- {
- static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = pointQueryNodeAABB((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist);
- return true;
- }
- };
-
- template<int N>
- struct BVHNQuantizedBaseNodePointQueryAABB1
- {
- static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
- {
- return pointQueryNodeAABB(node,query,dist);
- }
-
- static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
- {
- return pointQueryNodeAABB(node,query,time,dist);
- }
- };
-
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Node intersectors used in ray traversal
- //////////////////////////////////////////////////////////////////////////////////////
-
- /*! Intersects N nodes with 1 ray */
- template<int N, int Nx, int types, bool robust>
- struct BVHNNodeIntersector1;
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN1, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = intersectNode(node.getAABBNode(), ray, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN1, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = intersectNodeRobust(node.getAABBNode(), ray, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN2, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = intersectNode(node.getAABBNodeMB(), ray, time, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN2, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN2_AN4D, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = intersectNodeMB4D<N>(node, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN2_AN4D, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = intersectNodeMB4DRobust<N>(node, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN1_UN1, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (likely(node.isAABBNode())) mask = intersectNode(node.getAABBNode(), ray, dist);
- else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist);
- else return false;
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN1_UN1, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (likely(node.isAABBNode())) mask = intersectNodeRobust(node.getAABBNode(), ray, dist);
- else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist);
- else return false;
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN2_UN2, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (likely(node.isAABBNodeMB())) mask = intersectNode(node.getAABBNodeMB(), ray, time, dist);
- else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
- else return false;
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN2_UN2, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (likely(node.isAABBNodeMB())) mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist);
- else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
- else return false;
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN2_AN4D_UN2, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
- else mask = intersectNodeMB4D(node, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_AN2_AN4D_UN2, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
- else mask = intersectNodeMB4DRobust(node, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_QN1, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = intersectNode((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist);
- return true;
- }
- };
-
- template<int N, int Nx>
- struct BVHNNodeIntersector1<N, Nx, BVH_QN1, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask)
- {
- if (unlikely(node.isLeaf())) return false;
- mask = intersectNodeRobust((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist);
- return true;
- }
- };
-
- /*! Intersects N nodes with K rays */
- template<int N, int Nx, bool robust>
- struct BVHNQuantizedBaseNodeIntersector1;
-
- template<int N, int Nx>
- struct BVHNQuantizedBaseNodeIntersector1<N, Nx, false>
- {
- static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,Nx,false>& ray, vfloat<Nx>& dist)
- {
- return intersectNode(node,ray,dist);
- }
-
- static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,Nx,false>& ray, const float time, vfloat<N>& dist)
- {
- return intersectNode(node,ray,time,dist);
- }
-
- };
-
- template<int N, int Nx>
- struct BVHNQuantizedBaseNodeIntersector1<N, Nx, true>
- {
- static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,Nx,true>& ray, vfloat<Nx>& dist)
- {
- return intersectNode(node,ray,dist);
- }
-
- static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,Nx,true>& ray, const float time, vfloat<N>& dist)
- {
- return intersectNode(node,ray,time,dist);
- }
-
- };
-
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_frustum.h b/thirdparty/embree-aarch64/kernels/bvh/node_intersector_frustum.h
deleted file mode 100644
index 800ac8b478..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_frustum.h
+++ /dev/null
@@ -1,269 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "node_intersector.h"
-
-namespace embree
-{
- namespace isa
- {
- //////////////////////////////////////////////////////////////////////////////////////
- // Frustum structure used in hybrid and stream traversal
- //////////////////////////////////////////////////////////////////////////////////////
-
- /*
- Optimized frustum test. We calculate t=(p-org)/dir in ray/box
- intersection. We assume the rays are split by octant, thus
- dir intervals are either positive or negative in each
- dimension.
-
- Case 1: dir.min >= 0 && dir.max >= 0:
- t_min = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
- t_max = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
-
- Case 2: dir.min < 0 && dir.max < 0:
- t_min = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
- t_max = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
- */
-
- template<bool robust>
- struct Frustum;
-
- /* Fast variant */
- template<>
- struct Frustum<false>
- {
- __forceinline Frustum() {}
-
- template<int K>
- __forceinline Frustum(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
- {
- init(valid, org, rdir, ray_tnear, ray_tfar, N);
- }
-
- template<int K>
- __forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
- {
- const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
- reduce_min(select(valid, org.y, pos_inf)),
- reduce_min(select(valid, org.z, pos_inf)));
-
- const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
- reduce_max(select(valid, org.y, neg_inf)),
- reduce_max(select(valid, org.z, neg_inf)));
-
- const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
- reduce_min(select(valid, rdir.y, pos_inf)),
- reduce_min(select(valid, rdir.z, pos_inf)));
-
- const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
- reduce_max(select(valid, rdir.y, neg_inf)),
- reduce_max(select(valid, rdir.z, neg_inf)));
-
- const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
- const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
-
- init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
- }
-
- __forceinline void init(const Vec3fa& reduced_min_org,
- const Vec3fa& reduced_max_org,
- const Vec3fa& reduced_min_rdir,
- const Vec3fa& reduced_max_rdir,
- float reduced_min_dist,
- float reduced_max_dist,
- int N)
- {
- const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
-
- min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
- max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
-
-#if defined (__aarch64__)
- neg_min_org_rdir = -(min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org));
- neg_max_org_rdir = -(max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org));
-#else
- min_org_rdir = min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org);
- max_org_rdir = max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org);
-#endif
- min_dist = reduced_min_dist;
- max_dist = reduced_max_dist;
-
- nf = NearFarPrecalculations(min_rdir, N);
- }
-
- template<int K>
- __forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
- {
- max_dist = reduce_max(ray_tfar);
- }
-
- NearFarPrecalculations nf;
-
- Vec3fa min_rdir;
- Vec3fa max_rdir;
-
-#if defined (__aarch64__)
- Vec3fa neg_min_org_rdir;
- Vec3fa neg_max_org_rdir;
-#else
- Vec3fa min_org_rdir;
- Vec3fa max_org_rdir;
-#endif
- float min_dist;
- float max_dist;
- };
-
- typedef Frustum<false> FrustumFast;
-
- /* Robust variant */
- template<>
- struct Frustum<true>
- {
- __forceinline Frustum() {}
-
- template<int K>
- __forceinline Frustum(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
- {
- init(valid, org, rdir, ray_tnear, ray_tfar, N);
- }
-
- template<int K>
- __forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
- {
- const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
- reduce_min(select(valid, org.y, pos_inf)),
- reduce_min(select(valid, org.z, pos_inf)));
-
- const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
- reduce_max(select(valid, org.y, neg_inf)),
- reduce_max(select(valid, org.z, neg_inf)));
-
- const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
- reduce_min(select(valid, rdir.y, pos_inf)),
- reduce_min(select(valid, rdir.z, pos_inf)));
-
- const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
- reduce_max(select(valid, rdir.y, neg_inf)),
- reduce_max(select(valid, rdir.z, neg_inf)));
-
- const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
- const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
-
- init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
- }
-
- __forceinline void init(const Vec3fa& reduced_min_org,
- const Vec3fa& reduced_max_org,
- const Vec3fa& reduced_min_rdir,
- const Vec3fa& reduced_max_rdir,
- float reduced_min_dist,
- float reduced_max_dist,
- int N)
- {
- const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
- min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
- max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
-
- min_org = select(pos_rdir, reduced_max_org, reduced_min_org);
- max_org = select(pos_rdir, reduced_min_org, reduced_max_org);
-
- min_dist = reduced_min_dist;
- max_dist = reduced_max_dist;
-
- nf = NearFarPrecalculations(min_rdir, N);
- }
-
- template<int K>
- __forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
- {
- max_dist = reduce_max(ray_tfar);
- }
-
- NearFarPrecalculations nf;
-
- Vec3fa min_rdir;
- Vec3fa max_rdir;
-
- Vec3fa min_org;
- Vec3fa max_org;
-
- float min_dist;
- float max_dist;
- };
-
- typedef Frustum<true> FrustumRobust;
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast AABBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int Nx>
- __forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
- const FrustumFast& frustum, vfloat<Nx>& dist)
- {
- const vfloat<Nx> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
- const vfloat<Nx> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
- const vfloat<Nx> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
- const vfloat<Nx> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
- const vfloat<Nx> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
- const vfloat<Nx> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
-
-#if defined (__aarch64__)
- const vfloat<Nx> fminX = madd(bminX, vfloat<Nx>(frustum.min_rdir.x), vfloat<Nx>(frustum.neg_min_org_rdir.x));
- const vfloat<Nx> fminY = madd(bminY, vfloat<Nx>(frustum.min_rdir.y), vfloat<Nx>(frustum.neg_min_org_rdir.y));
- const vfloat<Nx> fminZ = madd(bminZ, vfloat<Nx>(frustum.min_rdir.z), vfloat<Nx>(frustum.neg_min_org_rdir.z));
- const vfloat<Nx> fmaxX = madd(bmaxX, vfloat<Nx>(frustum.max_rdir.x), vfloat<Nx>(frustum.neg_max_org_rdir.x));
- const vfloat<Nx> fmaxY = madd(bmaxY, vfloat<Nx>(frustum.max_rdir.y), vfloat<Nx>(frustum.neg_max_org_rdir.y));
- const vfloat<Nx> fmaxZ = madd(bmaxZ, vfloat<Nx>(frustum.max_rdir.z), vfloat<Nx>(frustum.neg_max_org_rdir.z));
-#else
- const vfloat<Nx> fminX = msub(bminX, vfloat<Nx>(frustum.min_rdir.x), vfloat<Nx>(frustum.min_org_rdir.x));
- const vfloat<Nx> fminY = msub(bminY, vfloat<Nx>(frustum.min_rdir.y), vfloat<Nx>(frustum.min_org_rdir.y));
- const vfloat<Nx> fminZ = msub(bminZ, vfloat<Nx>(frustum.min_rdir.z), vfloat<Nx>(frustum.min_org_rdir.z));
- const vfloat<Nx> fmaxX = msub(bmaxX, vfloat<Nx>(frustum.max_rdir.x), vfloat<Nx>(frustum.max_org_rdir.x));
- const vfloat<Nx> fmaxY = msub(bmaxY, vfloat<Nx>(frustum.max_rdir.y), vfloat<Nx>(frustum.max_org_rdir.y));
- const vfloat<Nx> fmaxZ = msub(bmaxZ, vfloat<Nx>(frustum.max_rdir.z), vfloat<Nx>(frustum.max_org_rdir.z));
-#endif
- const vfloat<Nx> fmin = maxi(fminX, fminY, fminZ, vfloat<Nx>(frustum.min_dist));
- dist = fmin;
- const vfloat<Nx> fmax = mini(fmaxX, fmaxY, fmaxZ, vfloat<Nx>(frustum.max_dist));
- const vbool<Nx> vmask_node_hit = fmin <= fmax;
- size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
- return m_node;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Robust AABBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int Nx>
- __forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
- const FrustumRobust& frustum, vfloat<Nx>& dist)
- {
- const vfloat<Nx> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
- const vfloat<Nx> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
- const vfloat<Nx> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
- const vfloat<Nx> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
- const vfloat<Nx> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
- const vfloat<Nx> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
-
- const vfloat<Nx> fminX = (bminX - vfloat<Nx>(frustum.min_org.x)) * vfloat<Nx>(frustum.min_rdir.x);
- const vfloat<Nx> fminY = (bminY - vfloat<Nx>(frustum.min_org.y)) * vfloat<Nx>(frustum.min_rdir.y);
- const vfloat<Nx> fminZ = (bminZ - vfloat<Nx>(frustum.min_org.z)) * vfloat<Nx>(frustum.min_rdir.z);
- const vfloat<Nx> fmaxX = (bmaxX - vfloat<Nx>(frustum.max_org.x)) * vfloat<Nx>(frustum.max_rdir.x);
- const vfloat<Nx> fmaxY = (bmaxY - vfloat<Nx>(frustum.max_org.y)) * vfloat<Nx>(frustum.max_rdir.y);
- const vfloat<Nx> fmaxZ = (bmaxZ - vfloat<Nx>(frustum.max_org.z)) * vfloat<Nx>(frustum.max_rdir.z);
-
- const float round_down = 1.0f-2.0f*float(ulp); // FIXME: use per instruction rounding for AVX512
- const float round_up = 1.0f+2.0f*float(ulp);
- const vfloat<Nx> fmin = max(fminX, fminY, fminZ, vfloat<Nx>(frustum.min_dist));
- dist = fmin;
- const vfloat<Nx> fmax = min(fmaxX, fmaxY, fmaxZ, vfloat<Nx>(frustum.max_dist));
- const vbool<Nx> vmask_node_hit = (round_down*fmin <= round_up*fmax);
- size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
- return m_node;
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet.h b/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet.h
deleted file mode 100644
index 0543e56f8e..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet.h
+++ /dev/null
@@ -1,843 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "node_intersector.h"
-
-namespace embree
-{
- namespace isa
- {
- //////////////////////////////////////////////////////////////////////////////////////
- // Ray packet structure used in hybrid traversal
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int K, bool robust>
- struct TravRayK;
-
- /* Fast variant */
- template<int K>
- struct TravRayK<K, false>
- {
- __forceinline TravRayK() {}
-
- __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
- {
- init(ray_org, ray_dir, N);
- }
-
- __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
- {
- init(ray_org, ray_dir, N);
- tnear = ray_tnear;
- tfar = ray_tfar;
- }
-
- __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
- {
- org = ray_org;
- dir = ray_dir;
- rdir = rcp_safe(ray_dir);
-#if defined(__aarch64__)
- neg_org_rdir = -(org * rdir);
-#elif defined(__AVX2__)
- org_rdir = org * rdir;
-#endif
- if (N)
- {
- const int size = sizeof(float)*N;
- nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
- nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
- nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
- }
- }
-
- Vec3vf<K> org;
- Vec3vf<K> dir;
- Vec3vf<K> rdir;
-#if defined(__aarch64__)
- Vec3vf<K> neg_org_rdir;
-#elif defined(__AVX2__)
- Vec3vf<K> org_rdir;
-#endif
- Vec3vi<K> nearXYZ;
- vfloat<K> tnear;
- vfloat<K> tfar;
- };
-
- template<int K>
- using TravRayKFast = TravRayK<K, false>;
-
- /* Robust variant */
- template<int K>
- struct TravRayK<K, true>
- {
- __forceinline TravRayK() {}
-
- __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
- {
- init(ray_org, ray_dir, N);
- }
-
- __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
- {
- init(ray_org, ray_dir, N);
- tnear = ray_tnear;
- tfar = ray_tfar;
- }
-
- __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
- {
- org = ray_org;
- dir = ray_dir;
- rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir));
-
- if (N)
- {
- const int size = sizeof(float)*N;
- nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
- nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
- nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
- }
- }
-
- Vec3vf<K> org;
- Vec3vf<K> dir;
- Vec3vf<K> rdir;
- Vec3vi<K> nearXYZ;
- vfloat<K> tnear;
- vfloat<K> tfar;
- };
-
- template<int K>
- using TravRayKRobust = TravRayK<K, true>;
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast AABBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K>
- __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNode* node, size_t i,
- const TravRayKFast<K>& ray, vfloat<K>& dist)
-
- {
-#if defined(__aarch64__)
- const vfloat<K> lclipMinX = madd(node->lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMinY = madd(node->lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMinZ = madd(node->lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<K> lclipMaxX = madd(node->upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMaxY = madd(node->upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMaxZ = madd(node->upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
-#elif defined(__AVX2__)
- const vfloat<K> lclipMinX = msub(node->lower_x[i], ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMinY = msub(node->lower_y[i], ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMinZ = msub(node->lower_z[i], ray.rdir.z, ray.org_rdir.z);
- const vfloat<K> lclipMaxX = msub(node->upper_x[i], ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMaxY = msub(node->upper_y[i], ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMaxZ = msub(node->upper_z[i], ray.rdir.z, ray.org_rdir.z);
- #else
- const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
- #endif
-
- #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- if (K == 16)
- {
- /* use mixed float/int min/max */
- const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
- dist = lnearP;
- return lhit;
- }
- else
- #endif
- {
- const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
- #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
- #else
- const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
- #endif
- dist = lnearP;
- return lhit;
- }
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Robust AABBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K>
- __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNode* node, size_t i,
- const TravRayKRobust<K>& ray, vfloat<K>& dist)
- {
- // FIXME: use per instruction rounding for AVX512
- const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
- const float round_up = 1.0f+3.0f*float(ulp);
- const float round_down = 1.0f-3.0f*float(ulp);
- const vfloat<K> lnearP = round_down*max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = round_up *min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
- dist = lnearP;
- return lhit;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast AABBNodeMB intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K>
- __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
- const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
- {
- const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
- const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
- const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
- const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
- const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
- const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
-
-#if defined(__aarch64__)
- const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
-#elif defined(__AVX2__)
- const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
-#else
- const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
-#endif
-
-#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- if (K == 16)
- {
- /* use mixed float/int min/max */
- const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
- dist = lnearP;
- return lhit;
- }
- else
-#endif
- {
- const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
-#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
-#else
- const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
-#endif
- dist = lnearP;
- return lhit;
- }
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Robust AABBNodeMB intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K>
- __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
- const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
- {
- const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
- const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
- const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
- const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
- const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
- const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
-
- const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
-
- const float round_up = 1.0f+3.0f*float(ulp);
- const float round_down = 1.0f-3.0f*float(ulp);
-
-#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- if (K == 16)
- {
- const vfloat<K> lnearP = round_down*maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = round_up *mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
- dist = lnearP;
- return lhit;
- }
- else
-#endif
- {
- const vfloat<K> lnearP = round_down*maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = round_up *mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
- dist = lnearP;
- return lhit;
- }
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast AABBNodeMB4D intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K>
- __forceinline vbool<K> intersectNodeKMB4D(const typename BVHN<N>::NodeRef ref, const size_t i,
- const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
- {
- const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
-
- const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
- const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
- const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
- const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
- const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
- const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
-
-#if defined(__aarch64__)
- const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
-#elif defined(__AVX2__)
- const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
-#else
- const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
-#endif
-
- const vfloat<K> lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
- vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
- if (unlikely(ref.isAABBNodeMB4D())) {
- const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
- lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
- }
- dist = lnearP;
- return lhit;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Robust AABBNodeMB4D intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K>
- __forceinline vbool<K> intersectNodeKMB4DRobust(const typename BVHN<N>::NodeRef ref, const size_t i,
- const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
- {
- const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
-
- const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
- const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
- const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
- const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
- const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
- const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
-
- const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
-
- const float round_up = 1.0f+3.0f*float(ulp);
- const float round_down = 1.0f-3.0f*float(ulp);
- const vfloat<K> lnearP = round_down*maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = round_up *mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
- vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
-
- if (unlikely(ref.isAABBNodeMB4D())) {
- const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
- lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
- }
- dist = lnearP;
- return lhit;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast OBBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K, bool robust>
- __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNode* node, const size_t i,
- const TravRayK<K,robust>& ray, vfloat<K>& dist)
- {
- const AffineSpace3vf<K> naabb(Vec3f(node->naabb.l.vx.x[i], node->naabb.l.vx.y[i], node->naabb.l.vx.z[i]),
- Vec3f(node->naabb.l.vy.x[i], node->naabb.l.vy.y[i], node->naabb.l.vy.z[i]),
- Vec3f(node->naabb.l.vz.x[i], node->naabb.l.vz.y[i], node->naabb.l.vz.z[i]),
- Vec3f(node->naabb.p .x[i], node->naabb.p .y[i], node->naabb.p .z[i]));
-
- const Vec3vf<K> dir = xfmVector(naabb, ray.dir);
- const Vec3vf<K> nrdir = Vec3vf<K>(vfloat<K>(-1.0f)) * rcp_safe(dir); // FIXME: negate instead of mul with -1?
- const Vec3vf<K> org = xfmPoint(naabb, ray.org);
-
- const vfloat<K> lclipMinX = org.x * nrdir.x; // (Vec3fa(zero) - org) * rdir;
- const vfloat<K> lclipMinY = org.y * nrdir.y;
- const vfloat<K> lclipMinZ = org.z * nrdir.z;
- const vfloat<K> lclipMaxX = lclipMinX - nrdir.x; // (Vec3fa(one) - org) * rdir;
- const vfloat<K> lclipMaxY = lclipMinY - nrdir.y;
- const vfloat<K> lclipMaxZ = lclipMinZ - nrdir.z;
-
- vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
- vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
- if (robust) {
- lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
- lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
- }
- const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
- dist = lnearP;
- return lhit;
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast OBBNodeMB intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K, bool robust>
- __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNodeMB* node, const size_t i,
- const TravRayK<K,robust>& ray, const vfloat<K>& time, vfloat<K>& dist)
- {
- const AffineSpace3vf<K> xfm(Vec3f(node->space0.l.vx.x[i], node->space0.l.vx.y[i], node->space0.l.vx.z[i]),
- Vec3f(node->space0.l.vy.x[i], node->space0.l.vy.y[i], node->space0.l.vy.z[i]),
- Vec3f(node->space0.l.vz.x[i], node->space0.l.vz.y[i], node->space0.l.vz.z[i]),
- Vec3f(node->space0.p .x[i], node->space0.p .y[i], node->space0.p .z[i]));
-
- const Vec3vf<K> b0_lower = zero;
- const Vec3vf<K> b0_upper = one;
- const Vec3vf<K> b1_lower(node->b1.lower.x[i], node->b1.lower.y[i], node->b1.lower.z[i]);
- const Vec3vf<K> b1_upper(node->b1.upper.x[i], node->b1.upper.y[i], node->b1.upper.z[i]);
- const Vec3vf<K> lower = lerp(b0_lower, b1_lower, time);
- const Vec3vf<K> upper = lerp(b0_upper, b1_upper, time);
-
- const Vec3vf<K> dir = xfmVector(xfm, ray.dir);
- const Vec3vf<K> rdir = rcp_safe(dir);
- const Vec3vf<K> org = xfmPoint(xfm, ray.org);
-
- const vfloat<K> lclipMinX = (lower.x - org.x) * rdir.x;
- const vfloat<K> lclipMinY = (lower.y - org.y) * rdir.y;
- const vfloat<K> lclipMinZ = (lower.z - org.z) * rdir.z;
- const vfloat<K> lclipMaxX = (upper.x - org.x) * rdir.x;
- const vfloat<K> lclipMaxY = (upper.y - org.y) * rdir.y;
- const vfloat<K> lclipMaxZ = (upper.z - org.z) * rdir.z;
-
- vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
- vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
- if (robust) {
- lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
- lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
- }
-
- const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
- dist = lnearP;
- return lhit;
- }
-
-
-
- //////////////////////////////////////////////////////////////////////////////////////
- // QuantizedBaseNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int K>
- __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
- const TravRayK<K,false>& ray, vfloat<K>& dist)
-
- {
- assert(movemask(node->validMask()) & ((size_t)1 << i));
- const vfloat<N> lower_x = node->dequantizeLowerX();
- const vfloat<N> upper_x = node->dequantizeUpperX();
- const vfloat<N> lower_y = node->dequantizeLowerY();
- const vfloat<N> upper_y = node->dequantizeUpperY();
- const vfloat<N> lower_z = node->dequantizeLowerZ();
- const vfloat<N> upper_z = node->dequantizeUpperZ();
-
- #if defined(__aarch64__)
- const vfloat<K> lclipMinX = madd(lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMinY = madd(lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMinZ = madd(lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<K> lclipMaxX = madd(upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMaxY = madd(upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMaxZ = madd(upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
- #elif defined(__AVX2__)
- const vfloat<K> lclipMinX = msub(lower_x[i], ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMinY = msub(lower_y[i], ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMinZ = msub(lower_z[i], ray.rdir.z, ray.org_rdir.z);
- const vfloat<K> lclipMaxX = msub(upper_x[i], ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMaxY = msub(upper_y[i], ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMaxZ = msub(upper_z[i], ray.rdir.z, ray.org_rdir.z);
- #else
- const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
- #endif
-
- #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- if (K == 16)
- {
- /* use mixed float/int min/max */
- const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
- dist = lnearP;
- return lhit;
- }
- else
- #endif
- {
- const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
- #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX
- const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
- #else
- const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
- #endif
- dist = lnearP;
- return lhit;
- }
- }
-
- template<int N, int K>
- __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
- const TravRayK<K,true>& ray, vfloat<K>& dist)
-
- {
- assert(movemask(node->validMask()) & ((size_t)1 << i));
- const vfloat<N> lower_x = node->dequantizeLowerX();
- const vfloat<N> upper_x = node->dequantizeUpperX();
- const vfloat<N> lower_y = node->dequantizeLowerY();
- const vfloat<N> upper_y = node->dequantizeUpperY();
- const vfloat<N> lower_z = node->dequantizeLowerZ();
- const vfloat<N> upper_z = node->dequantizeUpperZ();
-
- const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
-
- const float round_up = 1.0f+3.0f*float(ulp);
- const float round_down = 1.0f-3.0f*float(ulp);
-
- const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
- dist = lnearP;
- return lhit;
- }
-
- template<int N, int K>
- __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
- const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
-
- {
- assert(movemask(node->validMask()) & ((size_t)1 << i));
-
- const vfloat<K> lower_x = node->dequantizeLowerX(i,time);
- const vfloat<K> upper_x = node->dequantizeUpperX(i,time);
- const vfloat<K> lower_y = node->dequantizeLowerY(i,time);
- const vfloat<K> upper_y = node->dequantizeUpperY(i,time);
- const vfloat<K> lower_z = node->dequantizeLowerZ(i,time);
- const vfloat<K> upper_z = node->dequantizeUpperZ(i,time);
-
-#if defined(__aarch64__)
- const vfloat<K> lclipMinX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMinY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMinZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<K> lclipMaxX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> lclipMaxY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> lclipMaxZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
-#elif defined(__AVX2__)
- const vfloat<K> lclipMinX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMinY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMinZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
- const vfloat<K> lclipMaxX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> lclipMaxY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> lclipMaxZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
-#else
- const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
- #endif
- const vfloat<K> lnearP = max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
- dist = lnearP;
- return lhit;
- }
-
-
- template<int N, int K>
- __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
- const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
-
- {
- assert(movemask(node->validMask()) & ((size_t)1 << i));
-
- const vfloat<K> lower_x = node->dequantizeLowerX(i,time);
- const vfloat<K> upper_x = node->dequantizeUpperX(i,time);
- const vfloat<K> lower_y = node->dequantizeLowerY(i,time);
- const vfloat<K> upper_y = node->dequantizeUpperY(i,time);
- const vfloat<K> lower_z = node->dequantizeLowerZ(i,time);
- const vfloat<K> upper_z = node->dequantizeUpperZ(i,time);
-
- const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
- const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
- const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
- const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
-
- const float round_up = 1.0f+3.0f*float(ulp);
- const float round_down = 1.0f-3.0f*float(ulp);
-
- const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
- const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
- const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
- dist = lnearP;
- return lhit;
- }
-
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Node intersectors used in hybrid traversal
- //////////////////////////////////////////////////////////////////////////////////////
-
- /*! Intersects N nodes with K rays */
- template<int N, int K, int types, bool robust>
- struct BVHNNodeIntersectorK;
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN1, false>
- {
- /* vmask is both an input and an output parameter! Its initial value should be the parent node
- hit mask, which is used for correctly computing the current hit mask. The parent hit mask
- is actually required only for motion blur node intersections (because different rays may
- have different times), so for regular nodes vmask is simply overwritten. */
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN1, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN2, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN2, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- if (likely(node.isAABBNode())) vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
- else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- if (likely(node.isAABBNode())) vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
- else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- if (likely(node.isAABBNodeMB())) vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
- else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- if (likely(node.isAABBNodeMB())) vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
- else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, false>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
- vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
- } else /*if (unlikely(node.isOBBNodeMB()))*/ {
- assert(node.isOBBNodeMB());
- vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
- }
- return true;
- }
- };
-
- template<int N, int K>
- struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, true>
- {
- static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
- const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
- {
- if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
- vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
- } else /*if (unlikely(node.isOBBNodeMB()))*/ {
- assert(node.isOBBNodeMB());
- vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
- }
- return true;
- }
- };
-
-
- /*! Intersects N nodes with K rays */
- template<int N, int K, bool robust>
- struct BVHNQuantizedBaseNodeIntersectorK;
-
- template<int N, int K>
- struct BVHNQuantizedBaseNodeIntersectorK<N, K, false>
- {
- static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
- const TravRayK<K,false>& ray, vfloat<K>& dist)
- {
- return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
- }
-
- static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
- const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
- {
- return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
- }
-
- };
-
- template<int N, int K>
- struct BVHNQuantizedBaseNodeIntersectorK<N, K, true>
- {
- static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
- const TravRayK<K,true>& ray, vfloat<K>& dist)
- {
- return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
- }
-
- static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
- const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
- {
- return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
- }
- };
-
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet_stream.h b/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet_stream.h
deleted file mode 100644
index f379b57aea..0000000000
--- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet_stream.h
+++ /dev/null
@@ -1,215 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "node_intersector.h"
-
-namespace embree
-{
- namespace isa
- {
- //////////////////////////////////////////////////////////////////////////////////////
- // Ray packet structure used in stream traversal
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int K, bool robust>
- struct TravRayKStream;
-
- /* Fast variant */
- template<int K>
- struct TravRayKStream<K, false>
- {
- __forceinline TravRayKStream() {}
-
- __forceinline TravRayKStream(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar)
- {
- init(ray_org, ray_dir);
- tnear = ray_tnear;
- tfar = ray_tfar;
- }
-
- __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir)
- {
- rdir = rcp_safe(ray_dir);
-#if defined(__aarch64__)
- neg_org_rdir = -(ray_org * rdir);
-#else
- org_rdir = ray_org * rdir;
-#endif
- }
-
- Vec3vf<K> rdir;
-#if defined(__aarch64__)
- Vec3vf<K> neg_org_rdir;
-#else
- Vec3vf<K> org_rdir;
-#endif
- vfloat<K> tnear;
- vfloat<K> tfar;
- };
-
- template<int K>
- using TravRayKStreamFast = TravRayKStream<K, false>;
-
- /* Robust variant */
- template<int K>
- struct TravRayKStream<K, true>
- {
- __forceinline TravRayKStream() {}
-
- __forceinline TravRayKStream(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar)
- {
- init(ray_org, ray_dir);
- tnear = ray_tnear;
- tfar = ray_tfar;
- }
-
- __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir)
- {
- rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir));
- org = ray_org;
- }
-
- Vec3vf<K> rdir;
- Vec3vf<K> org;
- vfloat<K> tnear;
- vfloat<K> tfar;
- };
-
- template<int K>
- using TravRayKStreamRobust = TravRayKStream<K, true>;
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Fast AABBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int Nx, int K>
- __forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node,
- const TravRayKStreamFast<K>& ray, size_t k, const NearFarPrecalculations& nf)
- {
- const vfloat<Nx> bminX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX));
- const vfloat<Nx> bminY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY));
- const vfloat<Nx> bminZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ));
- const vfloat<Nx> bmaxX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX));
- const vfloat<Nx> bmaxY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY));
- const vfloat<Nx> bmaxZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ));
-
-#if defined (__aarch64__)
- const vfloat<Nx> rminX = madd(bminX, vfloat<Nx>(ray.rdir.x[k]), vfloat<Nx>(ray.neg_org_rdir.x[k]));
- const vfloat<Nx> rminY = madd(bminY, vfloat<Nx>(ray.rdir.y[k]), vfloat<Nx>(ray.neg_org_rdir.y[k]));
- const vfloat<Nx> rminZ = madd(bminZ, vfloat<Nx>(ray.rdir.z[k]), vfloat<Nx>(ray.neg_org_rdir.z[k]));
- const vfloat<Nx> rmaxX = madd(bmaxX, vfloat<Nx>(ray.rdir.x[k]), vfloat<Nx>(ray.neg_org_rdir.x[k]));
- const vfloat<Nx> rmaxY = madd(bmaxY, vfloat<Nx>(ray.rdir.y[k]), vfloat<Nx>(ray.neg_org_rdir.y[k]));
- const vfloat<Nx> rmaxZ = madd(bmaxZ, vfloat<Nx>(ray.rdir.z[k]), vfloat<Nx>(ray.neg_org_rdir.z[k]));
-#else
- const vfloat<Nx> rminX = msub(bminX, vfloat<Nx>(ray.rdir.x[k]), vfloat<Nx>(ray.org_rdir.x[k]));
- const vfloat<Nx> rminY = msub(bminY, vfloat<Nx>(ray.rdir.y[k]), vfloat<Nx>(ray.org_rdir.y[k]));
- const vfloat<Nx> rminZ = msub(bminZ, vfloat<Nx>(ray.rdir.z[k]), vfloat<Nx>(ray.org_rdir.z[k]));
- const vfloat<Nx> rmaxX = msub(bmaxX, vfloat<Nx>(ray.rdir.x[k]), vfloat<Nx>(ray.org_rdir.x[k]));
- const vfloat<Nx> rmaxY = msub(bmaxY, vfloat<Nx>(ray.rdir.y[k]), vfloat<Nx>(ray.org_rdir.y[k]));
- const vfloat<Nx> rmaxZ = msub(bmaxZ, vfloat<Nx>(ray.rdir.z[k]), vfloat<Nx>(ray.org_rdir.z[k]));
-#endif
- const vfloat<Nx> rmin = maxi(rminX, rminY, rminZ, vfloat<Nx>(ray.tnear[k]));
- const vfloat<Nx> rmax = mini(rmaxX, rmaxY, rmaxZ, vfloat<Nx>(ray.tfar[k]));
-
- const vbool<Nx> vmask_first_hit = rmin <= rmax;
-
- return movemask(vmask_first_hit) & (((size_t)1 << N)-1);
- }
-
- template<int N, int K>
- __forceinline size_t intersectNodeK(const typename BVHN<N>::AABBNode* __restrict__ node, size_t i,
- const TravRayKStreamFast<K>& ray, const NearFarPrecalculations& nf)
- {
- char* ptr = (char*)&node->lower_x + i*sizeof(float);
- const vfloat<K> bminX = *(const float*)(ptr + nf.nearX);
- const vfloat<K> bminY = *(const float*)(ptr + nf.nearY);
- const vfloat<K> bminZ = *(const float*)(ptr + nf.nearZ);
- const vfloat<K> bmaxX = *(const float*)(ptr + nf.farX);
- const vfloat<K> bmaxY = *(const float*)(ptr + nf.farY);
- const vfloat<K> bmaxZ = *(const float*)(ptr + nf.farZ);
-
-#if defined (__aarch64__)
- const vfloat<K> rminX = madd(bminX, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> rminY = madd(bminY, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> rminZ = madd(bminZ, ray.rdir.z, ray.neg_org_rdir.z);
- const vfloat<K> rmaxX = madd(bmaxX, ray.rdir.x, ray.neg_org_rdir.x);
- const vfloat<K> rmaxY = madd(bmaxY, ray.rdir.y, ray.neg_org_rdir.y);
- const vfloat<K> rmaxZ = madd(bmaxZ, ray.rdir.z, ray.neg_org_rdir.z);
-#else
- const vfloat<K> rminX = msub(bminX, ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> rminY = msub(bminY, ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> rminZ = msub(bminZ, ray.rdir.z, ray.org_rdir.z);
- const vfloat<K> rmaxX = msub(bmaxX, ray.rdir.x, ray.org_rdir.x);
- const vfloat<K> rmaxY = msub(bmaxY, ray.rdir.y, ray.org_rdir.y);
- const vfloat<K> rmaxZ = msub(bmaxZ, ray.rdir.z, ray.org_rdir.z);
-#endif
-
- const vfloat<K> rmin = maxi(rminX, rminY, rminZ, ray.tnear);
- const vfloat<K> rmax = mini(rmaxX, rmaxY, rmaxZ, ray.tfar);
-
- const vbool<K> vmask_first_hit = rmin <= rmax;
-
- return movemask(vmask_first_hit);
- }
-
- //////////////////////////////////////////////////////////////////////////////////////
- // Robust AABBNode intersection
- //////////////////////////////////////////////////////////////////////////////////////
-
- template<int N, int Nx, int K>
- __forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node,
- const TravRayKStreamRobust<K>& ray, size_t k, const NearFarPrecalculations& nf)
- {
- const vfloat<Nx> bminX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX));
- const vfloat<Nx> bminY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY));
- const vfloat<Nx> bminZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ));
- const vfloat<Nx> bmaxX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX));
- const vfloat<Nx> bmaxY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY));
- const vfloat<Nx> bmaxZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ));
-
- const vfloat<Nx> rminX = (bminX - vfloat<Nx>(ray.org.x[k])) * vfloat<Nx>(ray.rdir.x[k]);
- const vfloat<Nx> rminY = (bminY - vfloat<Nx>(ray.org.y[k])) * vfloat<Nx>(ray.rdir.y[k]);
- const vfloat<Nx> rminZ = (bminZ - vfloat<Nx>(ray.org.z[k])) * vfloat<Nx>(ray.rdir.z[k]);
- const vfloat<Nx> rmaxX = (bmaxX - vfloat<Nx>(ray.org.x[k])) * vfloat<Nx>(ray.rdir.x[k]);
- const vfloat<Nx> rmaxY = (bmaxY - vfloat<Nx>(ray.org.y[k])) * vfloat<Nx>(ray.rdir.y[k]);
- const vfloat<Nx> rmaxZ = (bmaxZ - vfloat<Nx>(ray.org.z[k])) * vfloat<Nx>(ray.rdir.z[k]);
- const float round_up = 1.0f+3.0f*float(ulp); // FIXME: use per instruction rounding for AVX512
- const vfloat<Nx> rmin = max(rminX, rminY, rminZ, vfloat<Nx>(ray.tnear[k]));
- const vfloat<Nx> rmax = round_up *min(rmaxX, rmaxY, rmaxZ, vfloat<Nx>(ray.tfar[k]));
-
- const vbool<Nx> vmask_first_hit = rmin <= rmax;
-
- return movemask(vmask_first_hit) & (((size_t)1 << N)-1);
- }
-
- template<int N, int K>
- __forceinline size_t intersectNodeK(const typename BVHN<N>::AABBNode* __restrict__ node, size_t i,
- const TravRayKStreamRobust<K>& ray, const NearFarPrecalculations& nf)
- {
- char *ptr = (char*)&node->lower_x + i*sizeof(float);
- const vfloat<K> bminX = *(const float*)(ptr + nf.nearX);
- const vfloat<K> bminY = *(const float*)(ptr + nf.nearY);
- const vfloat<K> bminZ = *(const float*)(ptr + nf.nearZ);
- const vfloat<K> bmaxX = *(const float*)(ptr + nf.farX);
- const vfloat<K> bmaxY = *(const float*)(ptr + nf.farY);
- const vfloat<K> bmaxZ = *(const float*)(ptr + nf.farZ);
-
- const vfloat<K> rminX = (bminX - ray.org.x) * ray.rdir.x;
- const vfloat<K> rminY = (bminY - ray.org.y) * ray.rdir.y;
- const vfloat<K> rminZ = (bminZ - ray.org.z) * ray.rdir.z;
- const vfloat<K> rmaxX = (bmaxX - ray.org.x) * ray.rdir.x;
- const vfloat<K> rmaxY = (bmaxY - ray.org.y) * ray.rdir.y;
- const vfloat<K> rmaxZ = (bmaxZ - ray.org.z) * ray.rdir.z;
-
- const float round_up = 1.0f+3.0f*float(ulp);
- const vfloat<K> rmin = max(rminX, rminY, rminZ, vfloat<K>(ray.tnear));
- const vfloat<K> rmax = round_up * min(rmaxX, rmaxY, rmaxZ, vfloat<K>(ray.tfar));
-
- const vbool<K> vmask_first_hit = rmin <= rmax;
-
- return movemask(vmask_first_hit);
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/accel.h b/thirdparty/embree-aarch64/kernels/common/accel.h
deleted file mode 100644
index c038d3cf21..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/accel.h
+++ /dev/null
@@ -1,556 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "ray.h"
-#include "point_query.h"
-#include "context.h"
-
-namespace embree
-{
- class Scene;
-
- /*! Base class for the acceleration structure data. */
- class AccelData : public RefCount
- {
- ALIGNED_CLASS_(16);
- public:
- enum Type { TY_UNKNOWN = 0, TY_ACCELN = 1, TY_ACCEL_INSTANCE = 2, TY_BVH4 = 3, TY_BVH8 = 4 };
-
- public:
- AccelData (const Type type)
- : bounds(empty), type(type) {}
-
- /*! notifies the acceleration structure about the deletion of some geometry */
- virtual void deleteGeometry(size_t geomID) {};
-
- /*! clears the acceleration structure data */
- virtual void clear() = 0;
-
- /*! returns normal bounds */
- __forceinline BBox3fa getBounds() const {
- return bounds.bounds();
- }
-
- /*! returns bounds for some time */
- __forceinline BBox3fa getBounds(float t) const {
- return bounds.interpolate(t);
- }
-
- /*! returns linear bounds */
- __forceinline LBBox3fa getLinearBounds() const {
- return bounds;
- }
-
- /*! checks if acceleration structure is empty */
- __forceinline bool isEmpty() const {
- return bounds.bounds0.lower.x == float(pos_inf);
- }
-
- public:
- LBBox3fa bounds; // linear bounds
- Type type;
- };
-
- /*! Base class for all intersectable and buildable acceleration structures. */
- class Accel : public AccelData
- {
- ALIGNED_CLASS_(16);
- public:
-
- struct Intersectors;
-
- /*! Type of collide function */
- typedef void (*CollideFunc)(void* bvh0, void* bvh1, RTCCollideFunc callback, void* userPtr);
-
- /*! Type of point query function */
- typedef bool(*PointQueryFunc)(Intersectors* This, /*!< this pointer to accel */
- PointQuery* query, /*!< point query for lookup */
- PointQueryContext* context); /*!< point query context */
-
- /*! Type of intersect function pointer for single rays. */
- typedef void (*IntersectFunc)(Intersectors* This, /*!< this pointer to accel */
- RTCRayHit& ray, /*!< ray to intersect */
- IntersectContext* context);
-
- /*! Type of intersect function pointer for ray packets of size 4. */
- typedef void (*IntersectFunc4)(const void* valid, /*!< pointer to valid mask */
- Intersectors* This, /*!< this pointer to accel */
- RTCRayHit4& ray, /*!< ray packet to intersect */
- IntersectContext* context);
-
- /*! Type of intersect function pointer for ray packets of size 8. */
- typedef void (*IntersectFunc8)(const void* valid, /*!< pointer to valid mask */
- Intersectors* This, /*!< this pointer to accel */
- RTCRayHit8& ray, /*!< ray packet to intersect */
- IntersectContext* context);
-
- /*! Type of intersect function pointer for ray packets of size 16. */
- typedef void (*IntersectFunc16)(const void* valid, /*!< pointer to valid mask */
- Intersectors* This, /*!< this pointer to accel */
- RTCRayHit16& ray, /*!< ray packet to intersect */
- IntersectContext* context);
-
- /*! Type of intersect function pointer for ray packets of size N. */
- typedef void (*IntersectFuncN)(Intersectors* This, /*!< this pointer to accel */
- RTCRayHitN** ray, /*!< ray stream to intersect */
- const size_t N, /*!< number of rays in stream */
- IntersectContext* context /*!< layout flags */);
-
-
- /*! Type of occlusion function pointer for single rays. */
- typedef void (*OccludedFunc) (Intersectors* This, /*!< this pointer to accel */
- RTCRay& ray, /*!< ray to test occlusion */
- IntersectContext* context);
-
- /*! Type of occlusion function pointer for ray packets of size 4. */
- typedef void (*OccludedFunc4) (const void* valid, /*!< pointer to valid mask */
- Intersectors* This, /*!< this pointer to accel */
- RTCRay4& ray, /*!< ray packet to test occlusion. */
- IntersectContext* context);
-
- /*! Type of occlusion function pointer for ray packets of size 8. */
- typedef void (*OccludedFunc8) (const void* valid, /*!< pointer to valid mask */
- Intersectors* This, /*!< this pointer to accel */
- RTCRay8& ray, /*!< ray packet to test occlusion. */
- IntersectContext* context);
-
- /*! Type of occlusion function pointer for ray packets of size 16. */
- typedef void (*OccludedFunc16) (const void* valid, /*!< pointer to valid mask */
- Intersectors* This, /*!< this pointer to accel */
- RTCRay16& ray, /*!< ray packet to test occlusion. */
- IntersectContext* context);
-
- /*! Type of intersect function pointer for ray packets of size N. */
- typedef void (*OccludedFuncN)(Intersectors* This, /*!< this pointer to accel */
- RTCRayN** ray, /*!< ray stream to test occlusion */
- const size_t N, /*!< number of rays in stream */
- IntersectContext* context /*!< layout flags */);
- typedef void (*ErrorFunc) ();
-
- struct Collider
- {
- Collider (ErrorFunc error = nullptr)
- : collide((CollideFunc)error), name(nullptr) {}
-
- Collider (CollideFunc collide, const char* name)
- : collide(collide), name(name) {}
-
- operator bool() const { return name; }
-
- public:
- CollideFunc collide;
- const char* name;
- };
-
- struct Intersector1
- {
- Intersector1 (ErrorFunc error = nullptr)
- : intersect((IntersectFunc)error), occluded((OccludedFunc)error), name(nullptr) {}
-
- Intersector1 (IntersectFunc intersect, OccludedFunc occluded, const char* name)
- : intersect(intersect), occluded(occluded), pointQuery(nullptr), name(name) {}
-
- Intersector1 (IntersectFunc intersect, OccludedFunc occluded, PointQueryFunc pointQuery, const char* name)
- : intersect(intersect), occluded(occluded), pointQuery(pointQuery), name(name) {}
-
- operator bool() const { return name; }
-
- public:
- static const char* type;
- IntersectFunc intersect;
- OccludedFunc occluded;
- PointQueryFunc pointQuery;
- const char* name;
- };
-
- struct Intersector4
- {
- Intersector4 (ErrorFunc error = nullptr)
- : intersect((IntersectFunc4)error), occluded((OccludedFunc4)error), name(nullptr) {}
-
- Intersector4 (IntersectFunc4 intersect, OccludedFunc4 occluded, const char* name)
- : intersect(intersect), occluded(occluded), name(name) {}
-
- operator bool() const { return name; }
-
- public:
- static const char* type;
- IntersectFunc4 intersect;
- OccludedFunc4 occluded;
- const char* name;
- };
-
- struct Intersector8
- {
- Intersector8 (ErrorFunc error = nullptr)
- : intersect((IntersectFunc8)error), occluded((OccludedFunc8)error), name(nullptr) {}
-
- Intersector8 (IntersectFunc8 intersect, OccludedFunc8 occluded, const char* name)
- : intersect(intersect), occluded(occluded), name(name) {}
-
- operator bool() const { return name; }
-
- public:
- static const char* type;
- IntersectFunc8 intersect;
- OccludedFunc8 occluded;
- const char* name;
- };
-
- struct Intersector16
- {
- Intersector16 (ErrorFunc error = nullptr)
- : intersect((IntersectFunc16)error), occluded((OccludedFunc16)error), name(nullptr) {}
-
- Intersector16 (IntersectFunc16 intersect, OccludedFunc16 occluded, const char* name)
- : intersect(intersect), occluded(occluded), name(name) {}
-
- operator bool() const { return name; }
-
- public:
- static const char* type;
- IntersectFunc16 intersect;
- OccludedFunc16 occluded;
- const char* name;
- };
-
- struct IntersectorN
- {
- IntersectorN (ErrorFunc error = nullptr)
- : intersect((IntersectFuncN)error), occluded((OccludedFuncN)error), name(nullptr) {}
-
- IntersectorN (IntersectFuncN intersect, OccludedFuncN occluded, const char* name)
- : intersect(intersect), occluded(occluded), name(name) {}
-
- operator bool() const { return name; }
-
- public:
- static const char* type;
- IntersectFuncN intersect;
- OccludedFuncN occluded;
- const char* name;
- };
-
- struct Intersectors
- {
- Intersectors()
- : ptr(nullptr), leafIntersector(nullptr), collider(nullptr), intersector1(nullptr), intersector4(nullptr), intersector8(nullptr), intersector16(nullptr), intersectorN(nullptr) {}
-
- Intersectors (ErrorFunc error)
- : ptr(nullptr), leafIntersector(nullptr), collider(error), intersector1(error), intersector4(error), intersector8(error), intersector16(error), intersectorN(error) {}
-
- void print(size_t ident)
- {
- if (collider.name) {
- for (size_t i=0; i<ident; i++) std::cout << " ";
- std::cout << "collider = " << collider.name << std::endl;
- }
- if (intersector1.name) {
- for (size_t i=0; i<ident; i++) std::cout << " ";
- std::cout << "intersector1 = " << intersector1.name << std::endl;
- }
- if (intersector4.name) {
- for (size_t i=0; i<ident; i++) std::cout << " ";
- std::cout << "intersector4 = " << intersector4.name << std::endl;
- }
- if (intersector8.name) {
- for (size_t i=0; i<ident; i++) std::cout << " ";
- std::cout << "intersector8 = " << intersector8.name << std::endl;
- }
- if (intersector16.name) {
- for (size_t i=0; i<ident; i++) std::cout << " ";
- std::cout << "intersector16 = " << intersector16.name << std::endl;
- }
- if (intersectorN.name) {
- for (size_t i=0; i<ident; i++) std::cout << " ";
- std::cout << "intersectorN = " << intersectorN.name << std::endl;
- }
- }
-
- void select(bool filter)
- {
- if (intersector4_filter) {
- if (filter) intersector4 = intersector4_filter;
- else intersector4 = intersector4_nofilter;
- }
- if (intersector8_filter) {
- if (filter) intersector8 = intersector8_filter;
- else intersector8 = intersector8_nofilter;
- }
- if (intersector16_filter) {
- if (filter) intersector16 = intersector16_filter;
- else intersector16 = intersector16_nofilter;
- }
- if (intersectorN_filter) {
- if (filter) intersectorN = intersectorN_filter;
- else intersectorN = intersectorN_nofilter;
- }
- }
-
- __forceinline bool pointQuery (PointQuery* query, PointQueryContext* context) {
- assert(intersector1.pointQuery);
- return intersector1.pointQuery(this,query,context);
- }
-
- /*! collides two scenes */
- __forceinline void collide (Accel* scene0, Accel* scene1, RTCCollideFunc callback, void* userPtr) {
- assert(collider.collide);
- collider.collide(scene0->intersectors.ptr,scene1->intersectors.ptr,callback,userPtr);
- }
-
- /*! Intersects a single ray with the scene. */
- __forceinline void intersect (RTCRayHit& ray, IntersectContext* context) {
- assert(intersector1.intersect);
- intersector1.intersect(this,ray,context);
- }
-
- /*! Intersects a packet of 4 rays with the scene. */
- __forceinline void intersect4 (const void* valid, RTCRayHit4& ray, IntersectContext* context) {
- assert(intersector4.intersect);
- intersector4.intersect(valid,this,ray,context);
- }
-
- /*! Intersects a packet of 8 rays with the scene. */
- __forceinline void intersect8 (const void* valid, RTCRayHit8& ray, IntersectContext* context) {
- assert(intersector8.intersect);
- intersector8.intersect(valid,this,ray,context);
- }
-
- /*! Intersects a packet of 16 rays with the scene. */
- __forceinline void intersect16 (const void* valid, RTCRayHit16& ray, IntersectContext* context) {
- assert(intersector16.intersect);
- intersector16.intersect(valid,this,ray,context);
- }
-
- /*! Intersects a stream of N rays in SOA layout with the scene. */
- __forceinline void intersectN (RTCRayHitN** rayN, const size_t N, IntersectContext* context)
- {
- assert(intersectorN.intersect);
- intersectorN.intersect(this,rayN,N,context);
- }
-
-#if defined(__SSE__) || defined(__ARM_NEON)
- __forceinline void intersect(const vbool4& valid, RayHitK<4>& ray, IntersectContext* context) {
- const vint<4> mask = valid.mask32();
- intersect4(&mask,(RTCRayHit4&)ray,context);
- }
-#endif
-#if defined(__AVX__)
- __forceinline void intersect(const vbool8& valid, RayHitK<8>& ray, IntersectContext* context) {
- const vint<8> mask = valid.mask32();
- intersect8(&mask,(RTCRayHit8&)ray,context);
- }
-#endif
-#if defined(__AVX512F__)
- __forceinline void intersect(const vbool16& valid, RayHitK<16>& ray, IntersectContext* context) {
- const vint<16> mask = valid.mask32();
- intersect16(&mask,(RTCRayHit16&)ray,context);
- }
-#endif
-
- template<int K>
- __forceinline void intersectN (RayHitK<K>** rayN, const size_t N, IntersectContext* context)
- {
- intersectN((RTCRayHitN**)rayN,N,context);
- }
-
- /*! Tests if single ray is occluded by the scene. */
- __forceinline void occluded (RTCRay& ray, IntersectContext* context) {
- assert(intersector1.occluded);
- intersector1.occluded(this,ray,context);
- }
-
- /*! Tests if a packet of 4 rays is occluded by the scene. */
- __forceinline void occluded4 (const void* valid, RTCRay4& ray, IntersectContext* context) {
- assert(intersector4.occluded);
- intersector4.occluded(valid,this,ray,context);
- }
-
- /*! Tests if a packet of 8 rays is occluded by the scene. */
- __forceinline void occluded8 (const void* valid, RTCRay8& ray, IntersectContext* context) {
- assert(intersector8.occluded);
- intersector8.occluded(valid,this,ray,context);
- }
-
- /*! Tests if a packet of 16 rays is occluded by the scene. */
- __forceinline void occluded16 (const void* valid, RTCRay16& ray, IntersectContext* context) {
- assert(intersector16.occluded);
- intersector16.occluded(valid,this,ray,context);
- }
-
- /*! Tests if a stream of N rays in SOA layout is occluded by the scene. */
- __forceinline void occludedN (RTCRayN** rayN, const size_t N, IntersectContext* context)
- {
- assert(intersectorN.occluded);
- intersectorN.occluded(this,rayN,N,context);
- }
-
-#if defined(__SSE__) || defined(__ARM_NEON)
- __forceinline void occluded(const vbool4& valid, RayK<4>& ray, IntersectContext* context) {
- const vint<4> mask = valid.mask32();
- occluded4(&mask,(RTCRay4&)ray,context);
- }
-#endif
-#if defined(__AVX__)
- __forceinline void occluded(const vbool8& valid, RayK<8>& ray, IntersectContext* context) {
- const vint<8> mask = valid.mask32();
- occluded8(&mask,(RTCRay8&)ray,context);
- }
-#endif
-#if defined(__AVX512F__)
- __forceinline void occluded(const vbool16& valid, RayK<16>& ray, IntersectContext* context) {
- const vint<16> mask = valid.mask32();
- occluded16(&mask,(RTCRay16&)ray,context);
- }
-#endif
-
- template<int K>
- __forceinline void occludedN (RayK<K>** rayN, const size_t N, IntersectContext* context)
- {
- occludedN((RTCRayN**)rayN,N,context);
- }
-
- /*! Tests if single ray is occluded by the scene. */
- __forceinline void intersect(RTCRay& ray, IntersectContext* context) {
- occluded(ray, context);
- }
-
- /*! Tests if a packet of K rays is occluded by the scene. */
- template<int K>
- __forceinline void intersect(const vbool<K>& valid, RayK<K>& ray, IntersectContext* context) {
- occluded(valid, ray, context);
- }
-
- /*! Tests if a packet of N rays in SOA layout is occluded by the scene. */
- template<int K>
- __forceinline void intersectN(RayK<K>** rayN, const size_t N, IntersectContext* context) {
- occludedN(rayN, N, context);
- }
-
- public:
- AccelData* ptr;
- void* leafIntersector;
- Collider collider;
- Intersector1 intersector1;
- Intersector4 intersector4;
- Intersector4 intersector4_filter;
- Intersector4 intersector4_nofilter;
- Intersector8 intersector8;
- Intersector8 intersector8_filter;
- Intersector8 intersector8_nofilter;
- Intersector16 intersector16;
- Intersector16 intersector16_filter;
- Intersector16 intersector16_nofilter;
- IntersectorN intersectorN;
- IntersectorN intersectorN_filter;
- IntersectorN intersectorN_nofilter;
- };
-
- public:
-
- /*! Construction */
- Accel (const AccelData::Type type)
- : AccelData(type) {}
-
- /*! Construction */
- Accel (const AccelData::Type type, const Intersectors& intersectors)
- : AccelData(type), intersectors(intersectors) {}
-
- /*! Virtual destructor */
- virtual ~Accel() {}
-
- /*! makes the acceleration structure immutable */
- virtual void immutable () {}
-
- /*! build acceleration structure */
- virtual void build () = 0;
-
- public:
- Intersectors intersectors;
- };
-
-#define DEFINE_COLLIDER(symbol,collider) \
- Accel::Collider symbol() { \
- return Accel::Collider((Accel::CollideFunc)collider::collide, \
- TOSTRING(isa) "::" TOSTRING(symbol)); \
- }
-
-#define DEFINE_INTERSECTOR1(symbol,intersector) \
- Accel::Intersector1 symbol() { \
- return Accel::Intersector1((Accel::IntersectFunc )intersector::intersect, \
- (Accel::OccludedFunc )intersector::occluded, \
- (Accel::PointQueryFunc)intersector::pointQuery,\
- TOSTRING(isa) "::" TOSTRING(symbol)); \
- }
-
-#define DEFINE_INTERSECTOR4(symbol,intersector) \
- Accel::Intersector4 symbol() { \
- return Accel::Intersector4((Accel::IntersectFunc4)intersector::intersect, \
- (Accel::OccludedFunc4)intersector::occluded, \
- TOSTRING(isa) "::" TOSTRING(symbol)); \
- }
-
-#define DEFINE_INTERSECTOR8(symbol,intersector) \
- Accel::Intersector8 symbol() { \
- return Accel::Intersector8((Accel::IntersectFunc8)intersector::intersect, \
- (Accel::OccludedFunc8)intersector::occluded, \
- TOSTRING(isa) "::" TOSTRING(symbol)); \
- }
-
-#define DEFINE_INTERSECTOR16(symbol,intersector) \
- Accel::Intersector16 symbol() { \
- return Accel::Intersector16((Accel::IntersectFunc16)intersector::intersect, \
- (Accel::OccludedFunc16)intersector::occluded, \
- TOSTRING(isa) "::" TOSTRING(symbol)); \
- }
-
-#define DEFINE_INTERSECTORN(symbol,intersector) \
- Accel::IntersectorN symbol() { \
- return Accel::IntersectorN((Accel::IntersectFuncN)intersector::intersect, \
- (Accel::OccludedFuncN)intersector::occluded, \
- TOSTRING(isa) "::" TOSTRING(symbol)); \
- }
-
- /* ray stream filter interface */
- typedef void (*intersectStreamAOS_func)(Scene* scene, RTCRayHit* _rayN, const size_t N, const size_t stride, IntersectContext* context);
- typedef void (*intersectStreamAOP_func)(Scene* scene, RTCRayHit** _rayN, const size_t N, IntersectContext* context);
- typedef void (*intersectStreamSOA_func)(Scene* scene, char* rayN, const size_t N, const size_t streams, const size_t stream_offset, IntersectContext* context);
- typedef void (*intersectStreamSOP_func)(Scene* scene, const RTCRayHitNp* rayN, const size_t N, IntersectContext* context);
-
- typedef void (*occludedStreamAOS_func)(Scene* scene, RTCRay* _rayN, const size_t N, const size_t stride, IntersectContext* context);
- typedef void (*occludedStreamAOP_func)(Scene* scene, RTCRay** _rayN, const size_t N, IntersectContext* context);
- typedef void (*occludedStreamSOA_func)(Scene* scene, char* rayN, const size_t N, const size_t streams, const size_t stream_offset, IntersectContext* context);
- typedef void (*occludedStreamSOP_func)(Scene* scene, const RTCRayNp* rayN, const size_t N, IntersectContext* context);
-
- struct RayStreamFilterFuncs
- {
- RayStreamFilterFuncs()
- : intersectAOS(nullptr), intersectAOP(nullptr), intersectSOA(nullptr), intersectSOP(nullptr),
- occludedAOS(nullptr), occludedAOP(nullptr), occludedSOA(nullptr), occludedSOP(nullptr) {}
-
- RayStreamFilterFuncs(void (*ptr) ())
- : intersectAOS((intersectStreamAOS_func) ptr), intersectAOP((intersectStreamAOP_func) ptr), intersectSOA((intersectStreamSOA_func) ptr), intersectSOP((intersectStreamSOP_func) ptr),
- occludedAOS((occludedStreamAOS_func) ptr), occludedAOP((occludedStreamAOP_func) ptr), occludedSOA((occludedStreamSOA_func) ptr), occludedSOP((occludedStreamSOP_func) ptr) {}
-
- RayStreamFilterFuncs(intersectStreamAOS_func intersectAOS, intersectStreamAOP_func intersectAOP, intersectStreamSOA_func intersectSOA, intersectStreamSOP_func intersectSOP,
- occludedStreamAOS_func occludedAOS, occludedStreamAOP_func occludedAOP, occludedStreamSOA_func occludedSOA, occludedStreamSOP_func occludedSOP)
- : intersectAOS(intersectAOS), intersectAOP(intersectAOP), intersectSOA(intersectSOA), intersectSOP(intersectSOP),
- occludedAOS(occludedAOS), occludedAOP(occludedAOP), occludedSOA(occludedSOA), occludedSOP(occludedSOP) {}
-
- public:
- intersectStreamAOS_func intersectAOS;
- intersectStreamAOP_func intersectAOP;
- intersectStreamSOA_func intersectSOA;
- intersectStreamSOP_func intersectSOP;
-
- occludedStreamAOS_func occludedAOS;
- occludedStreamAOP_func occludedAOP;
- occludedStreamSOA_func occludedSOA;
- occludedStreamSOP_func occludedSOP;
- };
-
- typedef RayStreamFilterFuncs (*RayStreamFilterFuncsType)();
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/accelinstance.h b/thirdparty/embree-aarch64/kernels/common/accelinstance.h
deleted file mode 100644
index d74b96df3f..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/accelinstance.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "accel.h"
-#include "builder.h"
-
-namespace embree
-{
- class AccelInstance : public Accel
- {
- public:
- AccelInstance (AccelData* accel, Builder* builder, Intersectors& intersectors)
- : Accel(AccelData::TY_ACCEL_INSTANCE,intersectors), accel(accel), builder(builder) {}
-
- void immutable () {
- builder.reset(nullptr);
- }
-
- public:
- void build () {
- if (builder) builder->build();
- bounds = accel->bounds;
- }
-
- void deleteGeometry(size_t geomID) {
- if (accel ) accel->deleteGeometry(geomID);
- if (builder) builder->deleteGeometry(geomID);
- }
-
- void clear() {
- if (accel) accel->clear();
- if (builder) builder->clear();
- }
-
- private:
- std::unique_ptr<AccelData> accel;
- std::unique_ptr<Builder> builder;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/acceln.cpp b/thirdparty/embree-aarch64/kernels/common/acceln.cpp
deleted file mode 100644
index aadb4a64ef..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/acceln.cpp
+++ /dev/null
@@ -1,232 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "acceln.h"
-#include "ray.h"
-#include "../../include/embree3/rtcore_ray.h"
-#include "../../common/algorithms/parallel_for.h"
-
-namespace embree
-{
- AccelN::AccelN()
- : Accel(AccelData::TY_ACCELN), accels() {}
-
- AccelN::~AccelN()
- {
- for (size_t i=0; i<accels.size(); i++)
- delete accels[i];
- }
-
- void AccelN::accels_add(Accel* accel)
- {
- assert(accel);
- accels.push_back(accel);
- }
-
- void AccelN::accels_init()
- {
- for (size_t i=0; i<accels.size(); i++)
- delete accels[i];
-
- accels.clear();
- }
-
- bool AccelN::pointQuery (Accel::Intersectors* This_in, PointQuery* query, PointQueryContext* context)
- {
- bool changed = false;
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++)
- if (!This->accels[i]->isEmpty())
- changed |= This->accels[i]->intersectors.pointQuery(query,context);
- return changed;
- }
-
- void AccelN::intersect (Accel::Intersectors* This_in, RTCRayHit& ray, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++)
- if (!This->accels[i]->isEmpty())
- This->accels[i]->intersectors.intersect(ray,context);
- }
-
- void AccelN::intersect4 (const void* valid, Accel::Intersectors* This_in, RTCRayHit4& ray, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++)
- if (!This->accels[i]->isEmpty())
- This->accels[i]->intersectors.intersect4(valid,ray,context);
- }
-
- void AccelN::intersect8 (const void* valid, Accel::Intersectors* This_in, RTCRayHit8& ray, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++)
- if (!This->accels[i]->isEmpty())
- This->accels[i]->intersectors.intersect8(valid,ray,context);
- }
-
- void AccelN::intersect16 (const void* valid, Accel::Intersectors* This_in, RTCRayHit16& ray, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++)
- if (!This->accels[i]->isEmpty())
- This->accels[i]->intersectors.intersect16(valid,ray,context);
- }
-
- void AccelN::intersectN (Accel::Intersectors* This_in, RTCRayHitN** ray, const size_t N, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++)
- if (!This->accels[i]->isEmpty())
- This->accels[i]->intersectors.intersectN(ray,N,context);
- }
-
- void AccelN::occluded (Accel::Intersectors* This_in, RTCRay& ray, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++) {
- if (This->accels[i]->isEmpty()) continue;
- This->accels[i]->intersectors.occluded(ray,context);
- if (ray.tfar < 0.0f) break;
- }
- }
-
- void AccelN::occluded4 (const void* valid, Accel::Intersectors* This_in, RTCRay4& ray, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++) {
- if (This->accels[i]->isEmpty()) continue;
- This->accels[i]->intersectors.occluded4(valid,ray,context);
-#if defined(__SSE2__) || defined(__ARM_NEON)
- vbool4 valid0 = asBool(((vint4*)valid)[0]);
- vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
- if (unlikely(none(valid0 & hit0))) break;
-#endif
- }
- }
-
- void AccelN::occluded8 (const void* valid, Accel::Intersectors* This_in, RTCRay8& ray, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++) {
- if (This->accels[i]->isEmpty()) continue;
- This->accels[i]->intersectors.occluded8(valid,ray,context);
-#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA
- vbool4 valid0 = asBool(((vint4*)valid)[0]);
- vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
- vbool4 valid1 = asBool(((vint4*)valid)[1]);
- vbool4 hit1 = ((vfloat4*)ray.tfar)[1] >= vfloat4(zero);
- if (unlikely((none((valid0 & hit0) | (valid1 & hit1))))) break;
-#endif
- }
- }
-
- void AccelN::occluded16 (const void* valid, Accel::Intersectors* This_in, RTCRay16& ray, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++) {
- if (This->accels[i]->isEmpty()) continue;
- This->accels[i]->intersectors.occluded16(valid,ray,context);
-#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA
- vbool4 valid0 = asBool(((vint4*)valid)[0]);
- vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
- vbool4 valid1 = asBool(((vint4*)valid)[1]);
- vbool4 hit1 = ((vfloat4*)ray.tfar)[1] >= vfloat4(zero);
- vbool4 valid2 = asBool(((vint4*)valid)[2]);
- vbool4 hit2 = ((vfloat4*)ray.tfar)[2] >= vfloat4(zero);
- vbool4 valid3 = asBool(((vint4*)valid)[3]);
- vbool4 hit3 = ((vfloat4*)ray.tfar)[3] >= vfloat4(zero);
- if (unlikely((none((valid0 & hit0) | (valid1 & hit1) | (valid2 & hit2) | (valid3 & hit3))))) break;
-#endif
- }
- }
-
- void AccelN::occludedN (Accel::Intersectors* This_in, RTCRayN** ray, const size_t N, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- size_t M = N;
- for (size_t i=0; i<This->accels.size(); i++)
- if (!This->accels[i]->isEmpty())
- This->accels[i]->intersectors.occludedN(ray,M,context);
- }
-
- void AccelN::accels_print(size_t ident)
- {
- for (size_t i=0; i<accels.size(); i++)
- {
- for (size_t j=0; j<ident; j++) std::cout << " ";
- std::cout << "accels[" << i << "]" << std::endl;
- accels[i]->intersectors.print(ident+2);
- }
- }
-
- void AccelN::accels_immutable()
- {
- for (size_t i=0; i<accels.size(); i++)
- accels[i]->immutable();
- }
-
- void AccelN::accels_build ()
- {
- /* reduce memory consumption */
- accels.shrink_to_fit();
-
- /* build all acceleration structures in parallel */
- parallel_for (accels.size(), [&] (size_t i) {
- accels[i]->build();
- });
-
- /* create list of non-empty acceleration structures */
- bool valid1 = true;
- bool valid4 = true;
- bool valid8 = true;
- bool valid16 = true;
- for (size_t i=0; i<accels.size(); i++) {
- valid1 &= (bool) accels[i]->intersectors.intersector1;
- valid4 &= (bool) accels[i]->intersectors.intersector4;
- valid8 &= (bool) accels[i]->intersectors.intersector8;
- valid16 &= (bool) accels[i]->intersectors.intersector16;
- }
-
- if (accels.size() == 1) {
- type = accels[0]->type; // FIXME: should just assign entire Accel
- bounds = accels[0]->bounds;
- intersectors = accels[0]->intersectors;
- }
- else
- {
- type = AccelData::TY_ACCELN;
- intersectors.ptr = this;
- intersectors.intersector1 = Intersector1(&intersect,&occluded,&pointQuery,valid1 ? "AccelN::intersector1": nullptr);
- intersectors.intersector4 = Intersector4(&intersect4,&occluded4,valid4 ? "AccelN::intersector4" : nullptr);
- intersectors.intersector8 = Intersector8(&intersect8,&occluded8,valid8 ? "AccelN::intersector8" : nullptr);
- intersectors.intersector16 = Intersector16(&intersect16,&occluded16,valid16 ? "AccelN::intersector16": nullptr);
- intersectors.intersectorN = IntersectorN(&intersectN,&occludedN,"AccelN::intersectorN");
-
- /*! calculate bounds */
- bounds = empty;
- for (size_t i=0; i<accels.size(); i++)
- bounds.extend(accels[i]->bounds);
- }
- }
-
- void AccelN::accels_select(bool filter)
- {
- for (size_t i=0; i<accels.size(); i++)
- accels[i]->intersectors.select(filter);
- }
-
- void AccelN::accels_deleteGeometry(size_t geomID)
- {
- for (size_t i=0; i<accels.size(); i++)
- accels[i]->deleteGeometry(geomID);
- }
-
- void AccelN::accels_clear()
- {
- for (size_t i=0; i<accels.size(); i++) {
- accels[i]->clear();
- }
- }
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/common/acceln.h b/thirdparty/embree-aarch64/kernels/common/acceln.h
deleted file mode 100644
index 2edd98f647..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/acceln.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "accel.h"
-
-namespace embree
-{
- /*! merges N acceleration structures together, by processing them in order */
- class AccelN : public Accel
- {
- public:
- AccelN ();
- ~AccelN();
-
- public:
- void accels_add(Accel* accel);
- void accels_init();
-
- public:
- static bool pointQuery (Accel::Intersectors* This, PointQuery* query, PointQueryContext* context);
-
- public:
- static void intersect (Accel::Intersectors* This, RTCRayHit& ray, IntersectContext* context);
- static void intersect4 (const void* valid, Accel::Intersectors* This, RTCRayHit4& ray, IntersectContext* context);
- static void intersect8 (const void* valid, Accel::Intersectors* This, RTCRayHit8& ray, IntersectContext* context);
- static void intersect16 (const void* valid, Accel::Intersectors* This, RTCRayHit16& ray, IntersectContext* context);
- static void intersectN (Accel::Intersectors* This, RTCRayHitN** ray, const size_t N, IntersectContext* context);
-
- public:
- static void occluded (Accel::Intersectors* This, RTCRay& ray, IntersectContext* context);
- static void occluded4 (const void* valid, Accel::Intersectors* This, RTCRay4& ray, IntersectContext* context);
- static void occluded8 (const void* valid, Accel::Intersectors* This, RTCRay8& ray, IntersectContext* context);
- static void occluded16 (const void* valid, Accel::Intersectors* This, RTCRay16& ray, IntersectContext* context);
- static void occludedN (Accel::Intersectors* This, RTCRayN** ray, const size_t N, IntersectContext* context);
-
- public:
- void accels_print(size_t ident);
- void accels_immutable();
- void accels_build ();
- void accels_select(bool filter);
- void accels_deleteGeometry(size_t geomID);
- void accels_clear ();
-
- public:
- std::vector<Accel*> accels;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/accelset.cpp b/thirdparty/embree-aarch64/kernels/common/accelset.cpp
deleted file mode 100644
index 79be1c4301..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/accelset.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "accelset.h"
-#include "scene.h"
-
-namespace embree
-{
- AccelSet::AccelSet (Device* device, Geometry::GType gtype, size_t numItems, size_t numTimeSteps)
- : Geometry(device,gtype,(unsigned int)numItems,(unsigned int)numTimeSteps), boundsFunc(nullptr) {}
-
- AccelSet::IntersectorN::IntersectorN (ErrorFunc error)
- : intersect((IntersectFuncN)error), occluded((OccludedFuncN)error), name(nullptr) {}
-
- AccelSet::IntersectorN::IntersectorN (IntersectFuncN intersect, OccludedFuncN occluded, const char* name)
- : intersect(intersect), occluded(occluded), name(name) {}
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/accelset.h b/thirdparty/embree-aarch64/kernels/common/accelset.h
deleted file mode 100644
index 3774b2accb..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/accelset.h
+++ /dev/null
@@ -1,248 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "builder.h"
-#include "geometry.h"
-#include "ray.h"
-#include "hit.h"
-
-namespace embree
-{
- struct IntersectFunctionNArguments;
- struct OccludedFunctionNArguments;
-
- typedef void (*ReportIntersectionFunc) (IntersectFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args);
- typedef void (*ReportOcclusionFunc) (OccludedFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args);
-
- struct IntersectFunctionNArguments : public RTCIntersectFunctionNArguments
- {
- IntersectContext* internal_context;
- Geometry* geometry;
- ReportIntersectionFunc report;
- };
-
- struct OccludedFunctionNArguments : public RTCOccludedFunctionNArguments
- {
- IntersectContext* internal_context;
- Geometry* geometry;
- ReportOcclusionFunc report;
- };
-
- /*! Base class for set of acceleration structures. */
- class AccelSet : public Geometry
- {
- public:
- typedef RTCIntersectFunctionN IntersectFuncN;
- typedef RTCOccludedFunctionN OccludedFuncN;
- typedef void (*ErrorFunc) ();
-
- struct IntersectorN
- {
- IntersectorN (ErrorFunc error = nullptr) ;
- IntersectorN (IntersectFuncN intersect, OccludedFuncN occluded, const char* name);
-
- operator bool() const { return name; }
-
- public:
- static const char* type;
- IntersectFuncN intersect;
- OccludedFuncN occluded;
- const char* name;
- };
-
- public:
-
- /*! construction */
- AccelSet (Device* device, Geometry::GType gtype, size_t items, size_t numTimeSteps);
-
- /*! makes the acceleration structure immutable */
- virtual void immutable () {}
-
- /*! build accel */
- virtual void build () = 0;
-
- /*! check if the i'th primitive is valid between the specified time range */
- __forceinline bool valid(size_t i, const range<size_t>& itime_range) const
- {
- for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
- if (!isvalid_non_empty(bounds(i,itime))) return false;
-
- return true;
- }
-
- /*! Calculates the bounds of an item */
- __forceinline BBox3fa bounds(size_t i, size_t itime = 0) const
- {
- BBox3fa box;
- assert(i < size());
- RTCBoundsFunctionArguments args;
- args.geometryUserPtr = userPtr;
- args.primID = (unsigned int)i;
- args.timeStep = (unsigned int)itime;
- args.bounds_o = (RTCBounds*)&box;
- boundsFunc(&args);
- return box;
- }
-
- /*! calculates the linear bounds of the i'th item at the itime'th time segment */
- __forceinline LBBox3fa linearBounds(size_t i, size_t itime) const
- {
- BBox3fa box[2];
- assert(i < size());
- RTCBoundsFunctionArguments args;
- args.geometryUserPtr = userPtr;
- args.primID = (unsigned int)i;
- args.timeStep = (unsigned int)(itime+0);
- args.bounds_o = (RTCBounds*)&box[0];
- boundsFunc(&args);
- args.timeStep = (unsigned int)(itime+1);
- args.bounds_o = (RTCBounds*)&box[1];
- boundsFunc(&args);
- return LBBox3fa(box[0],box[1]);
- }
-
- /*! calculates the build bounds of the i'th item, if it's valid */
- __forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
- {
- const BBox3fa b = bounds(i);
- if (bbox) *bbox = b;
- return isvalid_non_empty(b);
- }
-
- /*! calculates the build bounds of the i'th item at the itime'th time segment, if it's valid */
- __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
- {
- const LBBox3fa bounds = linearBounds(i,itime);
- bbox = bounds.bounds0; // use bounding box of first timestep to build BVH
- return isvalid_non_empty(bounds);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
- return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const {
- if (!valid(i, timeSegmentRange(time_range))) return false;
- bbox = linearBounds(i, time_range);
- return true;
- }
-
- /* gets version info of topology */
- unsigned int getTopologyVersion() const {
- return numPrimitives;
- }
-
- /* returns true if topology changed */
- bool topologyChanged(unsigned int otherVersion) const {
- return numPrimitives != otherVersion;
- }
-
- public:
-
- /*! Intersects a single ray with the scene. */
- __forceinline void intersect (RayHit& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportIntersectionFunc report)
- {
- assert(primID < size());
- assert(intersectorN.intersect);
-
- int mask = -1;
- IntersectFunctionNArguments args;
- args.valid = &mask;
- args.geometryUserPtr = userPtr;
- args.context = context->user;
- args.rayhit = (RTCRayHitN*)&ray;
- args.N = 1;
- args.geomID = geomID;
- args.primID = primID;
- args.internal_context = context;
- args.geometry = this;
- args.report = report;
-
- intersectorN.intersect(&args);
- }
-
- /*! Tests if single ray is occluded by the scene. */
- __forceinline void occluded (Ray& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportOcclusionFunc report)
- {
- assert(primID < size());
- assert(intersectorN.occluded);
-
- int mask = -1;
- OccludedFunctionNArguments args;
- args.valid = &mask;
- args.geometryUserPtr = userPtr;
- args.context = context->user;
- args.ray = (RTCRayN*)&ray;
- args.N = 1;
- args.geomID = geomID;
- args.primID = primID;
- args.internal_context = context;
- args.geometry = this;
- args.report = report;
-
- intersectorN.occluded(&args);
- }
-
- /*! Intersects a packet of K rays with the scene. */
- template<int K>
- __forceinline void intersect (const vbool<K>& valid, RayHitK<K>& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportIntersectionFunc report)
- {
- assert(primID < size());
- assert(intersectorN.intersect);
-
- vint<K> mask = valid.mask32();
- IntersectFunctionNArguments args;
- args.valid = (int*)&mask;
- args.geometryUserPtr = userPtr;
- args.context = context->user;
- args.rayhit = (RTCRayHitN*)&ray;
- args.N = K;
- args.geomID = geomID;
- args.primID = primID;
- args.internal_context = context;
- args.geometry = this;
- args.report = report;
-
- intersectorN.intersect(&args);
- }
-
- /*! Tests if a packet of K rays is occluded by the scene. */
- template<int K>
- __forceinline void occluded (const vbool<K>& valid, RayK<K>& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportOcclusionFunc report)
- {
- assert(primID < size());
- assert(intersectorN.occluded);
-
- vint<K> mask = valid.mask32();
- OccludedFunctionNArguments args;
- args.valid = (int*)&mask;
- args.geometryUserPtr = userPtr;
- args.context = context->user;
- args.ray = (RTCRayN*)&ray;
- args.N = K;
- args.geomID = geomID;
- args.primID = primID;
- args.internal_context = context;
- args.geometry = this;
- args.report = report;
-
- intersectorN.occluded(&args);
- }
-
- public:
- RTCBoundsFunction boundsFunc;
- IntersectorN intersectorN;
- };
-
-#define DEFINE_SET_INTERSECTORN(symbol,intersector) \
- AccelSet::IntersectorN symbol() { \
- return AccelSet::IntersectorN(intersector::intersect, \
- intersector::occluded, \
- TOSTRING(isa) "::" TOSTRING(symbol)); \
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/alloc.cpp b/thirdparty/embree-aarch64/kernels/common/alloc.cpp
deleted file mode 100644
index 6fa406f03a..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/alloc.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "alloc.h"
-#include "../../common/sys/thread.h"
-#if defined(__aarch64__) && defined(BUILD_IOS)
-#include "../../common/sys/barrier.h"
-#endif
-
-namespace embree
-{
- __thread FastAllocator::ThreadLocal2* FastAllocator::thread_local_allocator2 = nullptr;
- SpinLock FastAllocator::s_thread_local_allocators_lock;
- std::vector<std::unique_ptr<FastAllocator::ThreadLocal2>> FastAllocator::s_thread_local_allocators;
-
- struct fast_allocator_regression_test : public RegressionTest
- {
- BarrierSys barrier;
- std::atomic<size_t> numFailed;
- std::unique_ptr<FastAllocator> alloc;
-
- fast_allocator_regression_test()
- : RegressionTest("fast_allocator_regression_test"), numFailed(0)
- {
- registerRegressionTest(this);
- }
-
- static void thread_alloc(fast_allocator_regression_test* This)
- {
- FastAllocator::CachedAllocator threadalloc = This->alloc->getCachedAllocator();
-
- size_t* ptrs[1000];
- for (size_t j=0; j<1000; j++)
- {
- This->barrier.wait();
- for (size_t i=0; i<1000; i++) {
- ptrs[i] = (size_t*) threadalloc.malloc0(sizeof(size_t)+(i%32));
- *ptrs[i] = size_t(threadalloc.talloc0) + i;
- }
- for (size_t i=0; i<1000; i++) {
- if (*ptrs[i] != size_t(threadalloc.talloc0) + i)
- This->numFailed++;
- }
- This->barrier.wait();
- }
- }
-
- bool run ()
- {
- alloc = make_unique(new FastAllocator(nullptr,false));
- numFailed.store(0);
-
- size_t numThreads = getNumberOfLogicalThreads();
- barrier.init(numThreads+1);
-
- /* create threads */
- std::vector<thread_t> threads;
- for (size_t i=0; i<numThreads; i++)
- threads.push_back(createThread((thread_func)thread_alloc,this));
-
- /* run test */
- for (size_t i=0; i<1000; i++)
- {
- alloc->reset();
- barrier.wait();
- barrier.wait();
- }
-
- /* destroy threads */
- for (size_t i=0; i<numThreads; i++)
- join(threads[i]);
-
- alloc = nullptr;
-
- return numFailed == 0;
- }
- };
-
- fast_allocator_regression_test fast_allocator_regression;
-}
-
-
diff --git a/thirdparty/embree-aarch64/kernels/common/alloc.h b/thirdparty/embree-aarch64/kernels/common/alloc.h
deleted file mode 100644
index 488fa707ef..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/alloc.h
+++ /dev/null
@@ -1,1006 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "device.h"
-#include "scene.h"
-#include "primref.h"
-
-#if defined(__aarch64__) && defined(BUILD_IOS)
-#include <mutex>
-#endif
-
-namespace embree
-{
- class FastAllocator
- {
- /*! maximum supported alignment */
- static const size_t maxAlignment = 64;
-
- /*! maximum allocation size */
-
- /* default settings */
- //static const size_t defaultBlockSize = 4096;
-#define maxAllocationSize size_t(2*1024*1024-maxAlignment)
-
- static const size_t MAX_THREAD_USED_BLOCK_SLOTS = 8;
-
- public:
-
- struct ThreadLocal2;
- enum AllocationType { ALIGNED_MALLOC, EMBREE_OS_MALLOC, SHARED, ANY_TYPE };
-
- /*! Per thread structure holding the current memory block. */
- struct __aligned(64) ThreadLocal
- {
- ALIGNED_CLASS_(64);
- public:
-
- /*! Constructor for usage with ThreadLocalData */
- __forceinline ThreadLocal (ThreadLocal2* parent)
- : parent(parent), ptr(nullptr), cur(0), end(0), allocBlockSize(0), bytesUsed(0), bytesWasted(0) {}
-
- /*! initialize allocator */
- void init(FastAllocator* alloc)
- {
- ptr = nullptr;
- cur = end = 0;
- bytesUsed = 0;
- bytesWasted = 0;
- allocBlockSize = 0;
- if (alloc) allocBlockSize = alloc->defaultBlockSize;
- }
-
- /* Allocate aligned memory from the threads memory block. */
- __forceinline void* malloc(FastAllocator* alloc, size_t bytes, size_t align = 16)
- {
- /* bind the thread local allocator to the proper FastAllocator*/
- parent->bind(alloc);
-
- assert(align <= maxAlignment);
- bytesUsed += bytes;
-
- /* try to allocate in local block */
- size_t ofs = (align - cur) & (align-1);
- cur += bytes + ofs;
- if (likely(cur <= end)) { bytesWasted += ofs; return &ptr[cur - bytes]; }
- cur -= bytes + ofs;
-
- /* if allocation is too large allocate with parent allocator */
- if (4*bytes > allocBlockSize) {
- return alloc->malloc(bytes,maxAlignment,false);
- }
-
- /* get new partial block if allocation failed */
- size_t blockSize = allocBlockSize;
- ptr = (char*) alloc->malloc(blockSize,maxAlignment,true);
- bytesWasted += end-cur;
- cur = 0; end = blockSize;
-
- /* retry allocation */
- ofs = (align - cur) & (align-1);
- cur += bytes + ofs;
- if (likely(cur <= end)) { bytesWasted += ofs; return &ptr[cur - bytes]; }
- cur -= bytes + ofs;
-
- /* get new full block if allocation failed */
- blockSize = allocBlockSize;
- ptr = (char*) alloc->malloc(blockSize,maxAlignment,false);
- bytesWasted += end-cur;
- cur = 0; end = blockSize;
-
- /* retry allocation */
- ofs = (align - cur) & (align-1);
- cur += bytes + ofs;
- if (likely(cur <= end)) { bytesWasted += ofs; return &ptr[cur - bytes]; }
- cur -= bytes + ofs;
-
- /* should never happen as large allocations get handled specially above */
- assert(false);
- return nullptr;
- }
-
-
- /*! returns amount of used bytes */
- __forceinline size_t getUsedBytes() const { return bytesUsed; }
-
- /*! returns amount of free bytes */
- __forceinline size_t getFreeBytes() const { return end-cur; }
-
- /*! returns amount of wasted bytes */
- __forceinline size_t getWastedBytes() const { return bytesWasted; }
-
- private:
- ThreadLocal2* parent;
- char* ptr; //!< pointer to memory block
- size_t cur; //!< current location of the allocator
- size_t end; //!< end of the memory block
- size_t allocBlockSize; //!< block size for allocations
- size_t bytesUsed; //!< number of total bytes allocated
- size_t bytesWasted; //!< number of bytes wasted
- };
-
- /*! Two thread local structures. */
- struct __aligned(64) ThreadLocal2
- {
- ALIGNED_CLASS_(64);
- public:
-
- __forceinline ThreadLocal2()
- : alloc(nullptr), alloc0(this), alloc1(this) {}
-
- /*! bind to fast allocator */
- __forceinline void bind(FastAllocator* alloc_i)
- {
- assert(alloc_i);
- if (alloc.load() == alloc_i) return;
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(mutex);
-#else
- Lock<SpinLock> lock(mutex);
-#endif
- //if (alloc.load() == alloc_i) return; // not required as only one thread calls bind
- if (alloc.load()) {
- alloc.load()->bytesUsed += alloc0.getUsedBytes() + alloc1.getUsedBytes();
- alloc.load()->bytesFree += alloc0.getFreeBytes() + alloc1.getFreeBytes();
- alloc.load()->bytesWasted += alloc0.getWastedBytes() + alloc1.getWastedBytes();
- }
- alloc0.init(alloc_i);
- alloc1.init(alloc_i);
- alloc.store(alloc_i);
- alloc_i->join(this);
- }
-
- /*! unbind to fast allocator */
- void unbind(FastAllocator* alloc_i)
- {
- assert(alloc_i);
- if (alloc.load() != alloc_i) return;
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(mutex);
-#else
- Lock<SpinLock> lock(mutex);
-#endif
- if (alloc.load() != alloc_i) return; // required as a different thread calls unbind
- alloc.load()->bytesUsed += alloc0.getUsedBytes() + alloc1.getUsedBytes();
- alloc.load()->bytesFree += alloc0.getFreeBytes() + alloc1.getFreeBytes();
- alloc.load()->bytesWasted += alloc0.getWastedBytes() + alloc1.getWastedBytes();
- alloc0.init(nullptr);
- alloc1.init(nullptr);
- alloc.store(nullptr);
- }
-
- public:
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::mutex mutex;
-#else
- SpinLock mutex; //!< required as unbind is called from other threads
-#endif
- std::atomic<FastAllocator*> alloc; //!< parent allocator
- ThreadLocal alloc0;
- ThreadLocal alloc1;
- };
-
- FastAllocator (Device* device, bool osAllocation)
- : device(device), slotMask(0), usedBlocks(nullptr), freeBlocks(nullptr), use_single_mode(false), defaultBlockSize(PAGE_SIZE), estimatedSize(0),
- growSize(PAGE_SIZE), maxGrowSize(maxAllocationSize), log2_grow_size_scale(0), bytesUsed(0), bytesFree(0), bytesWasted(0), atype(osAllocation ? EMBREE_OS_MALLOC : ALIGNED_MALLOC),
- primrefarray(device,0)
- {
- for (size_t i=0; i<MAX_THREAD_USED_BLOCK_SLOTS; i++)
- {
- threadUsedBlocks[i] = nullptr;
- threadBlocks[i] = nullptr;
- assert(!slotMutex[i].isLocked());
- }
- }
-
- ~FastAllocator () {
- clear();
- }
-
- /*! returns the device attached to this allocator */
- Device* getDevice() {
- return device;
- }
-
- void share(mvector<PrimRef>& primrefarray_i) {
- primrefarray = std::move(primrefarray_i);
- }
-
- void unshare(mvector<PrimRef>& primrefarray_o)
- {
- reset(); // this removes blocks that are allocated inside the shared primref array
- primrefarray_o = std::move(primrefarray);
- }
-
- /*! returns first fast thread local allocator */
- __forceinline ThreadLocal* _threadLocal() {
- return &threadLocal2()->alloc0;
- }
-
- void setOSallocation(bool flag)
- {
- atype = flag ? EMBREE_OS_MALLOC : ALIGNED_MALLOC;
- }
-
- private:
-
- /*! returns both fast thread local allocators */
- __forceinline ThreadLocal2* threadLocal2()
- {
- ThreadLocal2* alloc = thread_local_allocator2;
- if (alloc == nullptr) {
- thread_local_allocator2 = alloc = new ThreadLocal2;
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(s_thread_local_allocators_lock);
-#else
- Lock<SpinLock> lock(s_thread_local_allocators_lock);
-#endif
- s_thread_local_allocators.push_back(make_unique(alloc));
- }
- return alloc;
- }
-
- public:
-
- __forceinline void join(ThreadLocal2* alloc)
- {
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(s_thread_local_allocators_lock);
-#else
- Lock<SpinLock> lock(thread_local_allocators_lock);
-#endif
- thread_local_allocators.push_back(alloc);
- }
-
- public:
-
- struct CachedAllocator
- {
- __forceinline CachedAllocator(void* ptr)
- : alloc(nullptr), talloc0(nullptr), talloc1(nullptr)
- {
- assert(ptr == nullptr);
- }
-
- __forceinline CachedAllocator(FastAllocator* alloc, ThreadLocal2* talloc)
- : alloc(alloc), talloc0(&talloc->alloc0), talloc1(alloc->use_single_mode ? &talloc->alloc0 : &talloc->alloc1) {}
-
- __forceinline operator bool () const {
- return alloc != nullptr;
- }
-
- __forceinline void* operator() (size_t bytes, size_t align = 16) const {
- return talloc0->malloc(alloc,bytes,align);
- }
-
- __forceinline void* malloc0 (size_t bytes, size_t align = 16) const {
- return talloc0->malloc(alloc,bytes,align);
- }
-
- __forceinline void* malloc1 (size_t bytes, size_t align = 16) const {
- return talloc1->malloc(alloc,bytes,align);
- }
-
- public:
- FastAllocator* alloc;
- ThreadLocal* talloc0;
- ThreadLocal* talloc1;
- };
-
- __forceinline CachedAllocator getCachedAllocator() {
- return CachedAllocator(this,threadLocal2());
- }
-
- /*! Builder interface to create thread local allocator */
- struct Create
- {
- public:
- __forceinline Create (FastAllocator* allocator) : allocator(allocator) {}
- __forceinline CachedAllocator operator() () const { return allocator->getCachedAllocator(); }
-
- private:
- FastAllocator* allocator;
- };
-
- void internal_fix_used_blocks()
- {
- /* move thread local blocks to global block list */
- for (size_t i = 0; i < MAX_THREAD_USED_BLOCK_SLOTS; i++)
- {
- while (threadBlocks[i].load() != nullptr) {
- Block* nextUsedBlock = threadBlocks[i].load()->next;
- threadBlocks[i].load()->next = usedBlocks.load();
- usedBlocks = threadBlocks[i].load();
- threadBlocks[i] = nextUsedBlock;
- }
- threadBlocks[i] = nullptr;
- }
- }
-
- static const size_t threadLocalAllocOverhead = 20; //! 20 means 5% parallel allocation overhead through unfilled thread local blocks
-#if defined(__AVX512ER__) // KNL
- static const size_t mainAllocOverheadStatic = 15; //! 15 means 7.5% allocation overhead through unfilled main alloc blocks
-#else
- static const size_t mainAllocOverheadStatic = 20; //! 20 means 5% allocation overhead through unfilled main alloc blocks
-#endif
- static const size_t mainAllocOverheadDynamic = 8; //! 20 means 12.5% allocation overhead through unfilled main alloc blocks
-
- /* calculates a single threaded threshold for the builders such
- * that for small scenes the overhead of partly allocated blocks
- * per thread is low */
- size_t fixSingleThreadThreshold(size_t branchingFactor, size_t defaultThreshold, size_t numPrimitives, size_t bytesEstimated)
- {
- if (numPrimitives == 0 || bytesEstimated == 0)
- return defaultThreshold;
-
- /* calculate block size in bytes to fulfill threadLocalAllocOverhead constraint */
- const size_t single_mode_factor = use_single_mode ? 1 : 2;
- const size_t threadCount = TaskScheduler::threadCount();
- const size_t singleThreadBytes = single_mode_factor*threadLocalAllocOverhead*defaultBlockSize;
-
- /* if we do not have to limit number of threads use optimal thresdhold */
- if ( (bytesEstimated+(singleThreadBytes-1))/singleThreadBytes >= threadCount)
- return defaultThreshold;
-
- /* otherwise limit number of threads by calculating proper single thread threshold */
- else {
- double bytesPerPrimitive = double(bytesEstimated)/double(numPrimitives);
- return size_t(ceil(branchingFactor*singleThreadBytes/bytesPerPrimitive));
- }
- }
-
- __forceinline size_t alignSize(size_t i) {
- return (i+127)/128*128;
- }
-
- /*! initializes the grow size */
- __forceinline void initGrowSizeAndNumSlots(size_t bytesEstimated, bool fast)
- {
- /* we do not need single thread local allocator mode */
- use_single_mode = false;
-
- /* calculate growSize such that at most mainAllocationOverhead gets wasted when a block stays unused */
- size_t mainAllocOverhead = fast ? mainAllocOverheadDynamic : mainAllocOverheadStatic;
- size_t blockSize = alignSize(bytesEstimated/mainAllocOverhead);
- growSize = maxGrowSize = clamp(blockSize,size_t(1024),maxAllocationSize);
-
- /* if we reached the maxAllocationSize for growSize, we can
- * increase the number of allocation slots by still guaranteeing
- * the mainAllocationOverhead */
- slotMask = 0x0;
-
- if (MAX_THREAD_USED_BLOCK_SLOTS >= 2 && bytesEstimated > 2*mainAllocOverhead*growSize) slotMask = 0x1;
- if (MAX_THREAD_USED_BLOCK_SLOTS >= 4 && bytesEstimated > 4*mainAllocOverhead*growSize) slotMask = 0x3;
- if (MAX_THREAD_USED_BLOCK_SLOTS >= 8 && bytesEstimated > 8*mainAllocOverhead*growSize) slotMask = 0x7;
- if (MAX_THREAD_USED_BLOCK_SLOTS >= 8 && bytesEstimated > 16*mainAllocOverhead*growSize) { growSize *= 2; } /* if the overhead is tiny, double the growSize */
-
- /* set the thread local alloc block size */
- size_t defaultBlockSizeSwitch = PAGE_SIZE+maxAlignment;
-
- /* for sufficiently large scene we can increase the defaultBlockSize over the defaultBlockSizeSwitch size */
-#if 0 // we do not do this as a block size of 4160 if for some reason best for KNL
- const size_t threadCount = TaskScheduler::threadCount();
- const size_t single_mode_factor = use_single_mode ? 1 : 2;
- const size_t singleThreadBytes = single_mode_factor*threadLocalAllocOverhead*defaultBlockSizeSwitch;
- if (bytesEstimated+(singleThreadBytes-1))/singleThreadBytes >= threadCount)
- defaultBlockSize = min(max(defaultBlockSizeSwitch,bytesEstimated/(single_mode_factor*threadLocalAllocOverhead*threadCount)),growSize);
-
- /* otherwise we grow the defaultBlockSize up to defaultBlockSizeSwitch */
- else
-#endif
- defaultBlockSize = clamp(blockSize,size_t(1024),defaultBlockSizeSwitch);
-
- if (bytesEstimated == 0) {
- maxGrowSize = maxAllocationSize; // special mode if builder cannot estimate tree size
- defaultBlockSize = defaultBlockSizeSwitch;
- }
- log2_grow_size_scale = 0;
-
- if (device->alloc_main_block_size != 0) growSize = device->alloc_main_block_size;
- if (device->alloc_num_main_slots >= 1 ) slotMask = 0x0;
- if (device->alloc_num_main_slots >= 2 ) slotMask = 0x1;
- if (device->alloc_num_main_slots >= 4 ) slotMask = 0x3;
- if (device->alloc_num_main_slots >= 8 ) slotMask = 0x7;
- if (device->alloc_thread_block_size != 0) defaultBlockSize = device->alloc_thread_block_size;
- if (device->alloc_single_thread_alloc != -1) use_single_mode = device->alloc_single_thread_alloc;
- }
-
- /*! initializes the allocator */
- void init(size_t bytesAllocate, size_t bytesReserve, size_t bytesEstimate)
- {
- internal_fix_used_blocks();
- /* distribute the allocation to multiple thread block slots */
- slotMask = MAX_THREAD_USED_BLOCK_SLOTS-1; // FIXME: remove
- if (usedBlocks.load() || freeBlocks.load()) { reset(); return; }
- if (bytesReserve == 0) bytesReserve = bytesAllocate;
- freeBlocks = Block::create(device,bytesAllocate,bytesReserve,nullptr,atype);
- estimatedSize = bytesEstimate;
- initGrowSizeAndNumSlots(bytesEstimate,true);
- }
-
- /*! initializes the allocator */
- void init_estimate(size_t bytesEstimate)
- {
- internal_fix_used_blocks();
- if (usedBlocks.load() || freeBlocks.load()) { reset(); return; }
- /* single allocator mode ? */
- estimatedSize = bytesEstimate;
- //initGrowSizeAndNumSlots(bytesEstimate,false);
- initGrowSizeAndNumSlots(bytesEstimate,false);
-
- }
-
- /*! frees state not required after build */
- __forceinline void cleanup()
- {
- internal_fix_used_blocks();
-
- /* unbind all thread local allocators */
- for (auto alloc : thread_local_allocators) alloc->unbind(this);
- thread_local_allocators.clear();
- }
-
- /*! resets the allocator, memory blocks get reused */
- void reset ()
- {
- internal_fix_used_blocks();
-
- bytesUsed.store(0);
- bytesFree.store(0);
- bytesWasted.store(0);
-
- /* reset all used blocks and move them to begin of free block list */
- while (usedBlocks.load() != nullptr) {
- usedBlocks.load()->reset_block();
- Block* nextUsedBlock = usedBlocks.load()->next;
- usedBlocks.load()->next = freeBlocks.load();
- freeBlocks = usedBlocks.load();
- usedBlocks = nextUsedBlock;
- }
-
- /* remove all shared blocks as they are re-added during build */
- freeBlocks.store(Block::remove_shared_blocks(freeBlocks.load()));
-
- for (size_t i=0; i<MAX_THREAD_USED_BLOCK_SLOTS; i++)
- {
- threadUsedBlocks[i] = nullptr;
- threadBlocks[i] = nullptr;
- }
-
- /* unbind all thread local allocators */
- for (auto alloc : thread_local_allocators) alloc->unbind(this);
- thread_local_allocators.clear();
- }
-
- /*! frees all allocated memory */
- __forceinline void clear()
- {
- cleanup();
- bytesUsed.store(0);
- bytesFree.store(0);
- bytesWasted.store(0);
- if (usedBlocks.load() != nullptr) usedBlocks.load()->clear_list(device); usedBlocks = nullptr;
- if (freeBlocks.load() != nullptr) freeBlocks.load()->clear_list(device); freeBlocks = nullptr;
- for (size_t i=0; i<MAX_THREAD_USED_BLOCK_SLOTS; i++) {
- threadUsedBlocks[i] = nullptr;
- threadBlocks[i] = nullptr;
- }
- primrefarray.clear();
- }
-
- __forceinline size_t incGrowSizeScale()
- {
- size_t scale = log2_grow_size_scale.fetch_add(1)+1;
- return size_t(1) << min(size_t(16),scale);
- }
-
- /*! thread safe allocation of memory */
- void* malloc(size_t& bytes, size_t align, bool partial)
- {
- assert(align <= maxAlignment);
-
- while (true)
- {
- /* allocate using current block */
- size_t threadID = TaskScheduler::threadID();
- size_t slot = threadID & slotMask;
- Block* myUsedBlocks = threadUsedBlocks[slot];
- if (myUsedBlocks) {
- void* ptr = myUsedBlocks->malloc(device,bytes,align,partial);
- if (ptr) return ptr;
- }
-
- /* throw error if allocation is too large */
- if (bytes > maxAllocationSize)
- throw_RTCError(RTC_ERROR_UNKNOWN,"allocation is too large");
-
- /* parallel block creation in case of no freeBlocks, avoids single global mutex */
- if (likely(freeBlocks.load() == nullptr))
- {
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(slotMutex[slot]);
-#else
- Lock<SpinLock> lock(slotMutex[slot]);
-#endif
- if (myUsedBlocks == threadUsedBlocks[slot]) {
- const size_t alignedBytes = (bytes+(align-1)) & ~(align-1);
- const size_t allocSize = max(min(growSize,maxGrowSize),alignedBytes);
- assert(allocSize >= bytes);
- threadBlocks[slot] = threadUsedBlocks[slot] = Block::create(device,allocSize,allocSize,threadBlocks[slot],atype); // FIXME: a large allocation might throw away a block here!
- // FIXME: a direct allocation should allocate inside the block here, and not in the next loop! a different thread could do some allocation and make the large allocation fail.
- }
- continue;
- }
-
- /* if this fails allocate new block */
- {
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(mutex);
-#else
- Lock<SpinLock> lock(mutex);
-#endif
- if (myUsedBlocks == threadUsedBlocks[slot])
- {
- if (freeBlocks.load() != nullptr) {
- Block* nextFreeBlock = freeBlocks.load()->next;
- freeBlocks.load()->next = usedBlocks;
- __memory_barrier();
- usedBlocks = freeBlocks.load();
- threadUsedBlocks[slot] = freeBlocks.load();
- freeBlocks = nextFreeBlock;
- } else {
- const size_t allocSize = min(growSize*incGrowSizeScale(),maxGrowSize);
- usedBlocks = threadUsedBlocks[slot] = Block::create(device,allocSize,allocSize,usedBlocks,atype); // FIXME: a large allocation should get delivered directly, like above!
- }
- }
- }
- }
- }
-
- /*! add new block */
- void addBlock(void* ptr, ssize_t bytes)
- {
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(mutex);
-#else
- Lock<SpinLock> lock(mutex);
-#endif
- const size_t sizeof_Header = offsetof(Block,data[0]);
- void* aptr = (void*) ((((size_t)ptr)+maxAlignment-1) & ~(maxAlignment-1));
- size_t ofs = (size_t) aptr - (size_t) ptr;
- bytes -= ofs;
- if (bytes < 4096) return; // ignore empty or very small blocks
- freeBlocks = new (aptr) Block(SHARED,bytes-sizeof_Header,bytes-sizeof_Header,freeBlocks,ofs);
- }
-
- /* special allocation only used from morton builder only a single time for each build */
- void* specialAlloc(size_t bytes)
- {
- assert(freeBlocks.load() != nullptr && freeBlocks.load()->getBlockAllocatedBytes() >= bytes);
- return freeBlocks.load()->ptr();
- }
-
- struct Statistics
- {
- Statistics ()
- : bytesUsed(0), bytesFree(0), bytesWasted(0) {}
-
- Statistics (size_t bytesUsed, size_t bytesFree, size_t bytesWasted)
- : bytesUsed(bytesUsed), bytesFree(bytesFree), bytesWasted(bytesWasted) {}
-
- Statistics (FastAllocator* alloc, AllocationType atype, bool huge_pages = false)
- : bytesUsed(0), bytesFree(0), bytesWasted(0)
- {
- Block* usedBlocks = alloc->usedBlocks.load();
- Block* freeBlocks = alloc->freeBlocks.load();
- if (usedBlocks) bytesUsed += usedBlocks->getUsedBytes(atype,huge_pages);
- if (freeBlocks) bytesFree += freeBlocks->getAllocatedBytes(atype,huge_pages);
- if (usedBlocks) bytesFree += usedBlocks->getFreeBytes(atype,huge_pages);
- if (freeBlocks) bytesWasted += freeBlocks->getWastedBytes(atype,huge_pages);
- if (usedBlocks) bytesWasted += usedBlocks->getWastedBytes(atype,huge_pages);
- }
-
- std::string str(size_t numPrimitives)
- {
- std::stringstream str;
- str.setf(std::ios::fixed, std::ios::floatfield);
- str << "used = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesUsed << " MB, "
- << "free = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesFree << " MB, "
- << "wasted = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesWasted << " MB, "
- << "total = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesAllocatedTotal() << " MB, "
- << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytesAllocatedTotal())/double(numPrimitives);
- return str.str();
- }
-
- friend Statistics operator+ ( const Statistics& a, const Statistics& b)
- {
- return Statistics(a.bytesUsed+b.bytesUsed,
- a.bytesFree+b.bytesFree,
- a.bytesWasted+b.bytesWasted);
- }
-
- size_t bytesAllocatedTotal() const {
- return bytesUsed + bytesFree + bytesWasted;
- }
-
- public:
- size_t bytesUsed;
- size_t bytesFree;
- size_t bytesWasted;
- };
-
- Statistics getStatistics(AllocationType atype, bool huge_pages = false) {
- return Statistics(this,atype,huge_pages);
- }
-
- size_t getUsedBytes() {
- return bytesUsed;
- }
-
- size_t getWastedBytes() {
- return bytesWasted;
- }
-
- struct AllStatistics
- {
- AllStatistics (FastAllocator* alloc)
-
- : bytesUsed(alloc->bytesUsed),
- bytesFree(alloc->bytesFree),
- bytesWasted(alloc->bytesWasted),
- stat_all(alloc,ANY_TYPE),
- stat_malloc(alloc,ALIGNED_MALLOC),
- stat_4K(alloc,EMBREE_OS_MALLOC,false),
- stat_2M(alloc,EMBREE_OS_MALLOC,true),
- stat_shared(alloc,SHARED) {}
-
- AllStatistics (size_t bytesUsed,
- size_t bytesFree,
- size_t bytesWasted,
- Statistics stat_all,
- Statistics stat_malloc,
- Statistics stat_4K,
- Statistics stat_2M,
- Statistics stat_shared)
-
- : bytesUsed(bytesUsed),
- bytesFree(bytesFree),
- bytesWasted(bytesWasted),
- stat_all(stat_all),
- stat_malloc(stat_malloc),
- stat_4K(stat_4K),
- stat_2M(stat_2M),
- stat_shared(stat_shared) {}
-
- friend AllStatistics operator+ (const AllStatistics& a, const AllStatistics& b)
- {
- return AllStatistics(a.bytesUsed+b.bytesUsed,
- a.bytesFree+b.bytesFree,
- a.bytesWasted+b.bytesWasted,
- a.stat_all + b.stat_all,
- a.stat_malloc + b.stat_malloc,
- a.stat_4K + b.stat_4K,
- a.stat_2M + b.stat_2M,
- a.stat_shared + b.stat_shared);
- }
-
- void print(size_t numPrimitives)
- {
- std::stringstream str0;
- str0.setf(std::ios::fixed, std::ios::floatfield);
- str0 << " alloc : "
- << "used = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesUsed << " MB, "
- << " "
- << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytesUsed)/double(numPrimitives);
- std::cout << str0.str() << std::endl;
-
- std::stringstream str1;
- str1.setf(std::ios::fixed, std::ios::floatfield);
- str1 << " alloc : "
- << "used = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesUsed << " MB, "
- << "free = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesFree << " MB, "
- << "wasted = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesWasted << " MB, "
- << "total = " << std::setw(7) << std::setprecision(3) << 1E-6f*(bytesUsed+bytesFree+bytesWasted) << " MB, "
- << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytesUsed+bytesFree+bytesWasted)/double(numPrimitives);
- std::cout << str1.str() << std::endl;
-
- std::cout << " total : " << stat_all.str(numPrimitives) << std::endl;
- std::cout << " 4K : " << stat_4K.str(numPrimitives) << std::endl;
- std::cout << " 2M : " << stat_2M.str(numPrimitives) << std::endl;
- std::cout << " malloc: " << stat_malloc.str(numPrimitives) << std::endl;
- std::cout << " shared: " << stat_shared.str(numPrimitives) << std::endl;
- }
-
- private:
- size_t bytesUsed;
- size_t bytesFree;
- size_t bytesWasted;
- Statistics stat_all;
- Statistics stat_malloc;
- Statistics stat_4K;
- Statistics stat_2M;
- Statistics stat_shared;
- };
-
- void print_blocks()
- {
- std::cout << " estimatedSize = " << estimatedSize << ", slotMask = " << slotMask << ", use_single_mode = " << use_single_mode << ", maxGrowSize = " << maxGrowSize << ", defaultBlockSize = " << defaultBlockSize << std::endl;
-
- std::cout << " used blocks = ";
- if (usedBlocks.load() != nullptr) usedBlocks.load()->print_list();
- std::cout << "[END]" << std::endl;
-
- std::cout << " free blocks = ";
- if (freeBlocks.load() != nullptr) freeBlocks.load()->print_list();
- std::cout << "[END]" << std::endl;
- }
-
- private:
-
- struct Block
- {
- static Block* create(MemoryMonitorInterface* device, size_t bytesAllocate, size_t bytesReserve, Block* next, AllocationType atype)
- {
- /* We avoid using os_malloc for small blocks as this could
- * cause a risk of fragmenting the virtual address space and
- * reach the limit of vm.max_map_count = 65k under Linux. */
- if (atype == EMBREE_OS_MALLOC && bytesAllocate < maxAllocationSize)
- atype = ALIGNED_MALLOC;
-
- /* we need to additionally allocate some header */
- const size_t sizeof_Header = offsetof(Block,data[0]);
- bytesAllocate = sizeof_Header+bytesAllocate;
- bytesReserve = sizeof_Header+bytesReserve;
-
- /* consume full 4k pages with using os_malloc */
- if (atype == EMBREE_OS_MALLOC) {
- bytesAllocate = ((bytesAllocate+PAGE_SIZE-1) & ~(PAGE_SIZE-1));
- bytesReserve = ((bytesReserve +PAGE_SIZE-1) & ~(PAGE_SIZE-1));
- }
-
- /* either use alignedMalloc or os_malloc */
- void *ptr = nullptr;
- if (atype == ALIGNED_MALLOC)
- {
- /* special handling for default block size */
- if (bytesAllocate == (2*PAGE_SIZE_2M))
- {
- const size_t alignment = maxAlignment;
- if (device) device->memoryMonitor(bytesAllocate+alignment,false);
- ptr = alignedMalloc(bytesAllocate,alignment);
-
- /* give hint to transparently convert these pages to 2MB pages */
- const size_t ptr_aligned_begin = ((size_t)ptr) & ~size_t(PAGE_SIZE_2M-1);
- os_advise((void*)(ptr_aligned_begin + 0),PAGE_SIZE_2M); // may fail if no memory mapped before block
- os_advise((void*)(ptr_aligned_begin + 1*PAGE_SIZE_2M),PAGE_SIZE_2M);
- os_advise((void*)(ptr_aligned_begin + 2*PAGE_SIZE_2M),PAGE_SIZE_2M); // may fail if no memory mapped after block
-
- return new (ptr) Block(ALIGNED_MALLOC,bytesAllocate-sizeof_Header,bytesAllocate-sizeof_Header,next,alignment);
- }
- else
- {
- const size_t alignment = maxAlignment;
- if (device) device->memoryMonitor(bytesAllocate+alignment,false);
- ptr = alignedMalloc(bytesAllocate,alignment);
- return new (ptr) Block(ALIGNED_MALLOC,bytesAllocate-sizeof_Header,bytesAllocate-sizeof_Header,next,alignment);
- }
- }
- else if (atype == EMBREE_OS_MALLOC)
- {
- if (device) device->memoryMonitor(bytesAllocate,false);
- bool huge_pages; ptr = os_malloc(bytesReserve,huge_pages);
- return new (ptr) Block(EMBREE_OS_MALLOC,bytesAllocate-sizeof_Header,bytesReserve-sizeof_Header,next,0,huge_pages);
- }
- else
- assert(false);
-
- return NULL;
- }
-
- Block (AllocationType atype, size_t bytesAllocate, size_t bytesReserve, Block* next, size_t wasted, bool huge_pages = false)
- : cur(0), allocEnd(bytesAllocate), reserveEnd(bytesReserve), next(next), wasted(wasted), atype(atype), huge_pages(huge_pages)
- {
- assert((((size_t)&data[0]) & (maxAlignment-1)) == 0);
- }
-
- static Block* remove_shared_blocks(Block* head)
- {
- Block** prev_next = &head;
- for (Block* block = head; block; block = block->next) {
- if (block->atype == SHARED) *prev_next = block->next;
- else prev_next = &block->next;
- }
- return head;
- }
-
- void clear_list(MemoryMonitorInterface* device)
- {
- Block* block = this;
- while (block) {
- Block* next = block->next;
- block->clear_block(device);
- block = next;
- }
- }
-
- void clear_block (MemoryMonitorInterface* device)
- {
- const size_t sizeof_Header = offsetof(Block,data[0]);
- const ssize_t sizeof_Alloced = wasted+sizeof_Header+getBlockAllocatedBytes();
-
- if (atype == ALIGNED_MALLOC) {
- alignedFree(this);
- if (device) device->memoryMonitor(-sizeof_Alloced,true);
- }
-
- else if (atype == EMBREE_OS_MALLOC) {
- size_t sizeof_This = sizeof_Header+reserveEnd;
- os_free(this,sizeof_This,huge_pages);
- if (device) device->memoryMonitor(-sizeof_Alloced,true);
- }
-
- else /* if (atype == SHARED) */ {
- }
- }
-
- void* malloc(MemoryMonitorInterface* device, size_t& bytes_in, size_t align, bool partial)
- {
- size_t bytes = bytes_in;
- assert(align <= maxAlignment);
- bytes = (bytes+(align-1)) & ~(align-1);
- if (unlikely(cur+bytes > reserveEnd && !partial)) return nullptr;
- const size_t i = cur.fetch_add(bytes);
- if (unlikely(i+bytes > reserveEnd && !partial)) return nullptr;
- if (unlikely(i > reserveEnd)) return nullptr;
- bytes_in = bytes = min(bytes,reserveEnd-i);
-
- if (i+bytes > allocEnd) {
- if (device) device->memoryMonitor(i+bytes-max(i,allocEnd),true);
- }
- return &data[i];
- }
-
- void* ptr() {
- return &data[cur];
- }
-
- void reset_block ()
- {
- allocEnd = max(allocEnd,(size_t)cur);
- cur = 0;
- }
-
- size_t getBlockUsedBytes() const {
- return min(size_t(cur),reserveEnd);
- }
-
- size_t getBlockFreeBytes() const {
- return getBlockAllocatedBytes() - getBlockUsedBytes();
- }
-
- size_t getBlockAllocatedBytes() const {
- return min(max(allocEnd,size_t(cur)),reserveEnd);
- }
-
- size_t getBlockWastedBytes() const {
- const size_t sizeof_Header = offsetof(Block,data[0]);
- return sizeof_Header + wasted;
- }
-
- size_t getBlockReservedBytes() const {
- return reserveEnd;
- }
-
- bool hasType(AllocationType atype_i, bool huge_pages_i) const
- {
- if (atype_i == ANY_TYPE ) return true;
- else if (atype == EMBREE_OS_MALLOC) return atype_i == atype && huge_pages_i == huge_pages;
- else return atype_i == atype;
- }
-
- size_t getUsedBytes(AllocationType atype, bool huge_pages = false) const {
- size_t bytes = 0;
- for (const Block* block = this; block; block = block->next) {
- if (!block->hasType(atype,huge_pages)) continue;
- bytes += block->getBlockUsedBytes();
- }
- return bytes;
- }
-
- size_t getFreeBytes(AllocationType atype, bool huge_pages = false) const {
- size_t bytes = 0;
- for (const Block* block = this; block; block = block->next) {
- if (!block->hasType(atype,huge_pages)) continue;
- bytes += block->getBlockFreeBytes();
- }
- return bytes;
- }
-
- size_t getWastedBytes(AllocationType atype, bool huge_pages = false) const {
- size_t bytes = 0;
- for (const Block* block = this; block; block = block->next) {
- if (!block->hasType(atype,huge_pages)) continue;
- bytes += block->getBlockWastedBytes();
- }
- return bytes;
- }
-
- size_t getAllocatedBytes(AllocationType atype, bool huge_pages = false) const {
- size_t bytes = 0;
- for (const Block* block = this; block; block = block->next) {
- if (!block->hasType(atype,huge_pages)) continue;
- bytes += block->getBlockAllocatedBytes();
- }
- return bytes;
- }
-
- void print_list ()
- {
- for (const Block* block = this; block; block = block->next)
- block->print_block();
- }
-
- void print_block() const
- {
- if (atype == ALIGNED_MALLOC) std::cout << "A";
- else if (atype == EMBREE_OS_MALLOC) std::cout << "O";
- else if (atype == SHARED) std::cout << "S";
- if (huge_pages) std::cout << "H";
- size_t bytesUsed = getBlockUsedBytes();
- size_t bytesFree = getBlockFreeBytes();
- size_t bytesWasted = getBlockWastedBytes();
- std::cout << "[" << bytesUsed << ", " << bytesFree << ", " << bytesWasted << "] ";
- }
-
- public:
- std::atomic<size_t> cur; //!< current location of the allocator
- std::atomic<size_t> allocEnd; //!< end of the allocated memory region
- std::atomic<size_t> reserveEnd; //!< end of the reserved memory region
- Block* next; //!< pointer to next block in list
- size_t wasted; //!< amount of memory wasted through block alignment
- AllocationType atype; //!< allocation mode of the block
- bool huge_pages; //!< whether the block uses huge pages
- char align[maxAlignment-5*sizeof(size_t)-sizeof(AllocationType)-sizeof(bool)]; //!< align data to maxAlignment
- char data[1]; //!< here starts memory to use for allocations
- };
-
- private:
- Device* device;
- SpinLock mutex;
- size_t slotMask;
- std::atomic<Block*> threadUsedBlocks[MAX_THREAD_USED_BLOCK_SLOTS];
- std::atomic<Block*> usedBlocks;
- std::atomic<Block*> freeBlocks;
-
- std::atomic<Block*> threadBlocks[MAX_THREAD_USED_BLOCK_SLOTS];
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::mutex slotMutex[MAX_THREAD_USED_BLOCK_SLOTS];
-#else
- SpinLock slotMutex[MAX_THREAD_USED_BLOCK_SLOTS];
-#endif
-
- bool use_single_mode;
- size_t defaultBlockSize;
- size_t estimatedSize;
- size_t growSize;
- size_t maxGrowSize;
- std::atomic<size_t> log2_grow_size_scale; //!< log2 of scaling factor for grow size // FIXME: remove
- std::atomic<size_t> bytesUsed;
- std::atomic<size_t> bytesFree;
- std::atomic<size_t> bytesWasted;
- static __thread ThreadLocal2* thread_local_allocator2;
- static SpinLock s_thread_local_allocators_lock;
- static std::vector<std::unique_ptr<ThreadLocal2>> s_thread_local_allocators;
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::mutex thread_local_allocators_lock;
-#else
- SpinLock thread_local_allocators_lock;
-#endif
- std::vector<ThreadLocal2*> thread_local_allocators;
- AllocationType atype;
- mvector<PrimRef> primrefarray; //!< primrefarray used to allocate nodes
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/buffer.h b/thirdparty/embree-aarch64/kernels/common/buffer.h
deleted file mode 100644
index 02d319c59d..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/buffer.h
+++ /dev/null
@@ -1,263 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "device.h"
-
-namespace embree
-{
- /*! Implements an API data buffer object. This class may or may not own the data. */
- class Buffer : public RefCount
- {
- public:
- /*! Buffer construction */
- Buffer()
- : device(nullptr), ptr(nullptr), numBytes(0), shared(false) {}
-
- /*! Buffer construction */
- Buffer(Device* device, size_t numBytes_in, void* ptr_in = nullptr)
- : device(device), numBytes(numBytes_in)
- {
- device->refInc();
-
- if (ptr_in)
- {
- shared = true;
- ptr = (char*)ptr_in;
- }
- else
- {
- shared = false;
- alloc();
- }
- }
-
- /*! Buffer destruction */
- ~Buffer() {
- free();
- device->refDec();
- }
-
- /*! this class is not copyable */
- private:
- Buffer(const Buffer& other) DELETED; // do not implement
- Buffer& operator =(const Buffer& other) DELETED; // do not implement
-
- public:
- /* inits and allocates the buffer */
- void create(Device* device_in, size_t numBytes_in)
- {
- init(device_in, numBytes_in);
- alloc();
- }
-
- /* inits the buffer */
- void init(Device* device_in, size_t numBytes_in)
- {
- free();
- device = device_in;
- ptr = nullptr;
- numBytes = numBytes_in;
- shared = false;
- }
-
- /*! sets shared buffer */
- void set(Device* device_in, void* ptr_in, size_t numBytes_in)
- {
- free();
- device = device_in;
- ptr = (char*)ptr_in;
- if (numBytes_in != (size_t)-1)
- numBytes = numBytes_in;
- shared = true;
- }
-
- /*! allocated buffer */
- void alloc()
- {
- if (device)
- device->memoryMonitor(this->bytes(), false);
- size_t b = (this->bytes()+15) & ssize_t(-16);
- ptr = (char*)alignedMalloc(b,16);
- }
-
- /*! frees the buffer */
- void free()
- {
- if (shared) return;
- alignedFree(ptr);
- if (device)
- device->memoryMonitor(-ssize_t(this->bytes()), true);
- ptr = nullptr;
- }
-
- /*! gets buffer pointer */
- void* data()
- {
- /* report error if buffer is not existing */
- if (!device)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer specified");
-
- /* return buffer */
- return ptr;
- }
-
- /*! returns pointer to first element */
- __forceinline char* getPtr() const {
- return ptr;
- }
-
- /*! returns the number of bytes of the buffer */
- __forceinline size_t bytes() const {
- return numBytes;
- }
-
- /*! returns true of the buffer is not empty */
- __forceinline operator bool() const {
- return ptr;
- }
-
- public:
- Device* device; //!< device to report memory usage to
- char* ptr; //!< pointer to buffer data
- size_t numBytes; //!< number of bytes in the buffer
- bool shared; //!< set if memory is shared with application
- };
-
- /*! An untyped contiguous range of a buffer. This class does not own the buffer content. */
- class RawBufferView
- {
- public:
- /*! Buffer construction */
- RawBufferView()
- : ptr_ofs(nullptr), stride(0), num(0), format(RTC_FORMAT_UNDEFINED), modCounter(1), modified(true), userData(0) {}
-
- public:
- /*! sets the buffer view */
- void set(const Ref<Buffer>& buffer_in, size_t offset_in, size_t stride_in, size_t num_in, RTCFormat format_in)
- {
- if ((offset_in + stride_in * num_in) > (stride_in * buffer_in->numBytes))
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "buffer range out of bounds");
-
- ptr_ofs = buffer_in->ptr + offset_in;
- stride = stride_in;
- num = num_in;
- format = format_in;
- modCounter++;
- modified = true;
- buffer = buffer_in;
- }
-
- /*! returns pointer to the first element */
- __forceinline char* getPtr() const {
- return ptr_ofs;
- }
-
- /*! returns pointer to the i'th element */
- __forceinline char* getPtr(size_t i) const
- {
- assert(i<num);
- return ptr_ofs + i*stride;
- }
-
- /*! returns the number of elements of the buffer */
- __forceinline size_t size() const {
- return num;
- }
-
- /*! returns the number of bytes of the buffer */
- __forceinline size_t bytes() const {
- return num*stride;
- }
-
- /*! returns the buffer stride */
- __forceinline unsigned getStride() const
- {
- assert(stride <= unsigned(inf));
- return unsigned(stride);
- }
-
- /*! return the buffer format */
- __forceinline RTCFormat getFormat() const {
- return format;
- }
-
- /*! mark buffer as modified or unmodified */
- __forceinline void setModified() {
- modCounter++;
- modified = true;
- }
-
- /*! mark buffer as modified or unmodified */
- __forceinline bool isModified(unsigned int otherModCounter) const {
- return modCounter > otherModCounter;
- }
-
- /*! mark buffer as modified or unmodified */
- __forceinline bool isLocalModified() const {
- return modified;
- }
-
- /*! clear local modified flag */
- __forceinline void clearLocalModified() {
- modified = false;
- }
-
- /*! returns true of the buffer is not empty */
- __forceinline operator bool() const {
- return ptr_ofs;
- }
-
- /*! checks padding to 16 byte check, fails hard */
- __forceinline void checkPadding16() const
- {
- if (ptr_ofs && num)
- volatile int MAYBE_UNUSED w = *((int*)getPtr(size()-1)+3); // FIXME: is failing hard avoidable?
- }
-
- public:
- char* ptr_ofs; //!< base pointer plus offset
- size_t stride; //!< stride of the buffer in bytes
- size_t num; //!< number of elements in the buffer
- RTCFormat format; //!< format of the buffer
- unsigned int modCounter; //!< version ID of this buffer
- bool modified; //!< local modified data
- int userData; //!< special data
- Ref<Buffer> buffer; //!< reference to the parent buffer
- };
-
- /*! A typed contiguous range of a buffer. This class does not own the buffer content. */
- template<typename T>
- class BufferView : public RawBufferView
- {
- public:
- typedef T value_type;
-
- /*! access to the ith element of the buffer */
- __forceinline T& operator [](size_t i) { assert(i<num); return *(T*)(ptr_ofs + i*stride); }
- __forceinline const T& operator [](size_t i) const { assert(i<num); return *(T*)(ptr_ofs + i*stride); }
- };
-
- template<>
- class BufferView<Vec3fa> : public RawBufferView
- {
- public:
- typedef Vec3fa value_type;
-
- /*! access to the ith element of the buffer */
- __forceinline const Vec3fa operator [](size_t i) const
- {
- assert(i<num);
- return Vec3fa(vfloat4::loadu((float*)(ptr_ofs + i*stride)));
- }
-
- /*! writes the i'th element */
- __forceinline void store(size_t i, const Vec3fa& v)
- {
- assert(i<num);
- vfloat4::storeu((float*)(ptr_ofs + i*stride), (vfloat4)v);
- }
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/builder.h b/thirdparty/embree-aarch64/kernels/common/builder.h
deleted file mode 100644
index d2a1cfe3ce..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/builder.h
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "accel.h"
-
-namespace embree
-{
-#define MODE_HIGH_QUALITY (1<<8)
-
- /*! virtual interface for all hierarchy builders */
- class Builder : public RefCount {
- public:
-
- static const size_t DEFAULT_SINGLE_THREAD_THRESHOLD = 1024;
-
- /*! initiates the hierarchy builder */
- virtual void build() = 0;
-
- /*! notifies the builder about the deletion of some geometry */
- virtual void deleteGeometry(size_t geomID) {};
-
- /*! clears internal builder state */
- virtual void clear() = 0;
- };
-
- /*! virtual interface for progress monitor class */
- struct BuildProgressMonitor {
- virtual void operator() (size_t dn) const = 0;
- };
-
- /*! build the progress monitor interface from a closure */
- template<typename Closure>
- struct ProgressMonitorClosure : BuildProgressMonitor
- {
- public:
- ProgressMonitorClosure (const Closure& closure) : closure(closure) {}
- void operator() (size_t dn) const { closure(dn); }
- private:
- const Closure closure;
- };
- template<typename Closure> __forceinline const ProgressMonitorClosure<Closure> BuildProgressMonitorFromClosure(const Closure& closure) {
- return ProgressMonitorClosure<Closure>(closure);
- }
-
- struct LineSegments;
- struct TriangleMesh;
- struct QuadMesh;
- struct UserGeometry;
-
- class Scene;
-
- typedef void (*createLineSegmentsAccelTy)(Scene* scene, LineSegments* mesh, AccelData*& accel, Builder*& builder);
- typedef void (*createTriangleMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
- typedef void (*createQuadMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
- typedef void (*createUserGeometryAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
-
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/context.h b/thirdparty/embree-aarch64/kernels/common/context.h
deleted file mode 100644
index d0185a74f2..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/context.h
+++ /dev/null
@@ -1,131 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "rtcore.h"
-#include "point_query.h"
-
-namespace embree
-{
- class Scene;
-
- struct IntersectContext
- {
- public:
- __forceinline IntersectContext(Scene* scene, RTCIntersectContext* user_context)
- : scene(scene), user(user_context) {}
-
- __forceinline bool hasContextFilter() const {
- return user->filter != nullptr;
- }
-
- __forceinline bool isCoherent() const {
- return embree::isCoherent(user->flags);
- }
-
- __forceinline bool isIncoherent() const {
- return embree::isIncoherent(user->flags);
- }
-
- public:
- Scene* scene;
- RTCIntersectContext* user;
- };
-
- template<int M, typename Geometry>
- __forceinline Vec4vf<M> enlargeRadiusToMinWidth(const IntersectContext* context, const Geometry* geom, const Vec3vf<M>& ray_org, const Vec4vf<M>& v)
- {
-#if RTC_MIN_WIDTH
- const vfloat<M> d = length(Vec3vf<M>(v) - ray_org);
- const vfloat<M> r = clamp(context->user->minWidthDistanceFactor*d, v.w, geom->maxRadiusScale*v.w);
- return Vec4vf<M>(v.x,v.y,v.z,r);
-#else
- return v;
-#endif
- }
-
- template<typename Geometry>
- __forceinline Vec3ff enlargeRadiusToMinWidth(const IntersectContext* context, const Geometry* geom, const Vec3fa& ray_org, const Vec3ff& v)
- {
-#if RTC_MIN_WIDTH
- const float d = length(Vec3fa(v) - ray_org);
- const float r = clamp(context->user->minWidthDistanceFactor*d, v.w, geom->maxRadiusScale*v.w);
- return Vec3ff(v.x,v.y,v.z,r);
-#else
- return v;
-#endif
- }
-
- enum PointQueryType
- {
- POINT_QUERY_TYPE_UNDEFINED = 0,
- POINT_QUERY_TYPE_SPHERE = 1,
- POINT_QUERY_TYPE_AABB = 2,
- };
-
- typedef bool (*PointQueryFunction)(struct RTCPointQueryFunctionArguments* args);
-
- struct PointQueryContext
- {
- public:
- __forceinline PointQueryContext(Scene* scene,
- PointQuery* query_ws,
- PointQueryType query_type,
- PointQueryFunction func,
- RTCPointQueryContext* userContext,
- float similarityScale,
- void* userPtr)
- : scene(scene)
- , query_ws(query_ws)
- , query_type(query_type)
- , func(func)
- , userContext(userContext)
- , similarityScale(similarityScale)
- , userPtr(userPtr)
- , primID(RTC_INVALID_GEOMETRY_ID)
- , geomID(RTC_INVALID_GEOMETRY_ID)
- , query_radius(query_ws->radius)
- {
- if (query_type == POINT_QUERY_TYPE_AABB) {
- assert(similarityScale == 0.f);
- updateAABB();
- }
- if (userContext->instStackSize == 0) {
- assert(similarityScale == 1.f);
- }
- }
-
- public:
- __forceinline void updateAABB()
- {
- if (likely(query_ws->radius == (float)inf || userContext->instStackSize == 0)) {
- query_radius = Vec3fa(query_ws->radius);
- return;
- }
-
- const AffineSpace3fa m = AffineSpace3fa_load_unaligned((AffineSpace3fa*)userContext->world2inst[userContext->instStackSize-1]);
- BBox3fa bbox(Vec3fa(-query_ws->radius), Vec3fa(query_ws->radius));
- bbox = xfmBounds(m, bbox);
- query_radius = 0.5f * (bbox.upper - bbox.lower);
- }
-
-public:
- Scene* scene;
-
- PointQuery* query_ws; // the original world space point query
- PointQueryType query_type;
- PointQueryFunction func;
- RTCPointQueryContext* userContext;
- const float similarityScale;
-
- void* userPtr;
-
- unsigned int primID;
- unsigned int geomID;
-
- Vec3fa query_radius; // used if the query is converted to an AABB internally
- };
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/common/default.h b/thirdparty/embree-aarch64/kernels/common/default.h
deleted file mode 100644
index 709119163b..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/default.h
+++ /dev/null
@@ -1,273 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../../common/sys/platform.h"
-#include "../../common/sys/sysinfo.h"
-#include "../../common/sys/thread.h"
-#include "../../common/sys/alloc.h"
-#include "../../common/sys/ref.h"
-#include "../../common/sys/intrinsics.h"
-#include "../../common/sys/atomic.h"
-#include "../../common/sys/mutex.h"
-#include "../../common/sys/vector.h"
-#include "../../common/sys/array.h"
-#include "../../common/sys/string.h"
-#include "../../common/sys/regression.h"
-#include "../../common/sys/vector.h"
-
-#include "../../common/math/math.h"
-#include "../../common/math/transcendental.h"
-#include "../../common/simd/simd.h"
-#include "../../common/math/vec2.h"
-#include "../../common/math/vec3.h"
-#include "../../common/math/vec4.h"
-#include "../../common/math/vec2fa.h"
-#include "../../common/math/vec3fa.h"
-#include "../../common/math/interval.h"
-#include "../../common/math/bbox.h"
-#include "../../common/math/obbox.h"
-#include "../../common/math/lbbox.h"
-#include "../../common/math/linearspace2.h"
-#include "../../common/math/linearspace3.h"
-#include "../../common/math/affinespace.h"
-#include "../../common/math/range.h"
-#include "../../common/lexers/tokenstream.h"
-
-#include "../../common/tasking/taskscheduler.h"
-
-#define COMMA ,
-
-#include "../config.h"
-#include "isa.h"
-#include "stat.h"
-#include "profile.h"
-#include "rtcore.h"
-#include "vector.h"
-#include "state.h"
-#include "instance_stack.h"
-
-#include <vector>
-#include <map>
-#include <algorithm>
-#include <functional>
-#include <utility>
-#include <sstream>
-
-#if !defined(_DEBUG) && defined(BUILD_IOS)
-#undef assert
-#define assert(_EXPR)
-#endif
-
-namespace embree
-{
- ////////////////////////////////////////////////////////////////////////////////
- /// Vec2 shortcuts
- ////////////////////////////////////////////////////////////////////////////////
-
- template<int N> using Vec2vf = Vec2<vfloat<N>>;
- template<int N> using Vec2vd = Vec2<vdouble<N>>;
- template<int N> using Vec2vr = Vec2<vreal<N>>;
- template<int N> using Vec2vi = Vec2<vint<N>>;
- template<int N> using Vec2vl = Vec2<vllong<N>>;
- template<int N> using Vec2vb = Vec2<vbool<N>>;
- template<int N> using Vec2vbf = Vec2<vboolf<N>>;
- template<int N> using Vec2vbd = Vec2<vboold<N>>;
-
- typedef Vec2<vfloat4> Vec2vf4;
- typedef Vec2<vdouble4> Vec2vd4;
- typedef Vec2<vreal4> Vec2vr4;
- typedef Vec2<vint4> Vec2vi4;
- typedef Vec2<vllong4> Vec2vl4;
- typedef Vec2<vbool4> Vec2vb4;
- typedef Vec2<vboolf4> Vec2vbf4;
- typedef Vec2<vboold4> Vec2vbd4;
-
- typedef Vec2<vfloat8> Vec2vf8;
- typedef Vec2<vdouble8> Vec2vd8;
- typedef Vec2<vreal8> Vec2vr8;
- typedef Vec2<vint8> Vec2vi8;
- typedef Vec2<vllong8> Vec2vl8;
- typedef Vec2<vbool8> Vec2vb8;
- typedef Vec2<vboolf8> Vec2vbf8;
- typedef Vec2<vboold8> Vec2vbd8;
-
- typedef Vec2<vfloat16> Vec2vf16;
- typedef Vec2<vdouble16> Vec2vd16;
- typedef Vec2<vreal16> Vec2vr16;
- typedef Vec2<vint16> Vec2vi16;
- typedef Vec2<vllong16> Vec2vl16;
- typedef Vec2<vbool16> Vec2vb16;
- typedef Vec2<vboolf16> Vec2vbf16;
- typedef Vec2<vboold16> Vec2vbd16;
-
- typedef Vec2<vfloatx> Vec2vfx;
- typedef Vec2<vdoublex> Vec2vdx;
- typedef Vec2<vrealx> Vec2vrx;
- typedef Vec2<vintx> Vec2vix;
- typedef Vec2<vllongx> Vec2vlx;
- typedef Vec2<vboolx> Vec2vbx;
- typedef Vec2<vboolfx> Vec2vbfx;
- typedef Vec2<vbooldx> Vec2vbdx;
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Vec3 shortcuts
- ////////////////////////////////////////////////////////////////////////////////
-
- template<int N> using Vec3vf = Vec3<vfloat<N>>;
- template<int N> using Vec3vd = Vec3<vdouble<N>>;
- template<int N> using Vec3vr = Vec3<vreal<N>>;
- template<int N> using Vec3vi = Vec3<vint<N>>;
- template<int N> using Vec3vl = Vec3<vllong<N>>;
- template<int N> using Vec3vb = Vec3<vbool<N>>;
- template<int N> using Vec3vbf = Vec3<vboolf<N>>;
- template<int N> using Vec3vbd = Vec3<vboold<N>>;
-
- typedef Vec3<vfloat4> Vec3vf4;
- typedef Vec3<vdouble4> Vec3vd4;
- typedef Vec3<vreal4> Vec3vr4;
- typedef Vec3<vint4> Vec3vi4;
- typedef Vec3<vllong4> Vec3vl4;
- typedef Vec3<vbool4> Vec3vb4;
- typedef Vec3<vboolf4> Vec3vbf4;
- typedef Vec3<vboold4> Vec3vbd4;
-
- typedef Vec3<vfloat8> Vec3vf8;
- typedef Vec3<vdouble8> Vec3vd8;
- typedef Vec3<vreal8> Vec3vr8;
- typedef Vec3<vint8> Vec3vi8;
- typedef Vec3<vllong8> Vec3vl8;
- typedef Vec3<vbool8> Vec3vb8;
- typedef Vec3<vboolf8> Vec3vbf8;
- typedef Vec3<vboold8> Vec3vbd8;
-
- typedef Vec3<vfloat16> Vec3vf16;
- typedef Vec3<vdouble16> Vec3vd16;
- typedef Vec3<vreal16> Vec3vr16;
- typedef Vec3<vint16> Vec3vi16;
- typedef Vec3<vllong16> Vec3vl16;
- typedef Vec3<vbool16> Vec3vb16;
- typedef Vec3<vboolf16> Vec3vbf16;
- typedef Vec3<vboold16> Vec3vbd16;
-
- typedef Vec3<vfloatx> Vec3vfx;
- typedef Vec3<vdoublex> Vec3vdx;
- typedef Vec3<vrealx> Vec3vrx;
- typedef Vec3<vintx> Vec3vix;
- typedef Vec3<vllongx> Vec3vlx;
- typedef Vec3<vboolx> Vec3vbx;
- typedef Vec3<vboolfx> Vec3vbfx;
- typedef Vec3<vbooldx> Vec3vbdx;
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Vec4 shortcuts
- ////////////////////////////////////////////////////////////////////////////////
-
- template<int N> using Vec4vf = Vec4<vfloat<N>>;
- template<int N> using Vec4vd = Vec4<vdouble<N>>;
- template<int N> using Vec4vr = Vec4<vreal<N>>;
- template<int N> using Vec4vi = Vec4<vint<N>>;
- template<int N> using Vec4vl = Vec4<vllong<N>>;
- template<int N> using Vec4vb = Vec4<vbool<N>>;
- template<int N> using Vec4vbf = Vec4<vboolf<N>>;
- template<int N> using Vec4vbd = Vec4<vboold<N>>;
-
- typedef Vec4<vfloat4> Vec4vf4;
- typedef Vec4<vdouble4> Vec4vd4;
- typedef Vec4<vreal4> Vec4vr4;
- typedef Vec4<vint4> Vec4vi4;
- typedef Vec4<vllong4> Vec4vl4;
- typedef Vec4<vbool4> Vec4vb4;
- typedef Vec4<vboolf4> Vec4vbf4;
- typedef Vec4<vboold4> Vec4vbd4;
-
- typedef Vec4<vfloat8> Vec4vf8;
- typedef Vec4<vdouble8> Vec4vd8;
- typedef Vec4<vreal8> Vec4vr8;
- typedef Vec4<vint8> Vec4vi8;
- typedef Vec4<vllong8> Vec4vl8;
- typedef Vec4<vbool8> Vec4vb8;
- typedef Vec4<vboolf8> Vec4vbf8;
- typedef Vec4<vboold8> Vec4vbd8;
-
- typedef Vec4<vfloat16> Vec4vf16;
- typedef Vec4<vdouble16> Vec4vd16;
- typedef Vec4<vreal16> Vec4vr16;
- typedef Vec4<vint16> Vec4vi16;
- typedef Vec4<vllong16> Vec4vl16;
- typedef Vec4<vbool16> Vec4vb16;
- typedef Vec4<vboolf16> Vec4vbf16;
- typedef Vec4<vboold16> Vec4vbd16;
-
- typedef Vec4<vfloatx> Vec4vfx;
- typedef Vec4<vdoublex> Vec4vdx;
- typedef Vec4<vrealx> Vec4vrx;
- typedef Vec4<vintx> Vec4vix;
- typedef Vec4<vllongx> Vec4vlx;
- typedef Vec4<vboolx> Vec4vbx;
- typedef Vec4<vboolfx> Vec4vbfx;
- typedef Vec4<vbooldx> Vec4vbdx;
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Other shortcuts
- ////////////////////////////////////////////////////////////////////////////////
-
- template<int N> using BBox3vf = BBox<Vec3vf<N>>;
- typedef BBox<Vec3vf4> BBox3vf4;
- typedef BBox<Vec3vf8> BBox3vf8;
- typedef BBox<Vec3vf16> BBox3vf16;
-
- /* calculate time segment itime and fractional time ftime */
- __forceinline int getTimeSegment(float time, float numTimeSegments, float& ftime)
- {
- const float timeScaled = time * numTimeSegments;
- const float itimef = clamp(floorf(timeScaled), 0.0f, numTimeSegments-1.0f);
- ftime = timeScaled - itimef;
- return int(itimef);
- }
-
- __forceinline int getTimeSegment(float time, float start_time, float end_time, float numTimeSegments, float& ftime)
- {
- const float timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments;
- const float itimef = clamp(floorf(timeScaled), 0.0f, numTimeSegments-1.0f);
- ftime = timeScaled - itimef;
- return int(itimef);
- }
-
- template<int N>
- __forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime)
- {
- const vfloat<N> timeScaled = time * numTimeSegments;
- const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f);
- ftime = timeScaled - itimef;
- return vint<N>(itimef);
- }
-
- template<int N>
- __forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& start_time, const vfloat<N>& end_time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime)
- {
- const vfloat<N> timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments;
- const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f);
- ftime = timeScaled - itimef;
- return vint<N>(itimef);
- }
-
- /* calculate overlapping time segment range */
- __forceinline range<int> getTimeSegmentRange(const BBox1f& time_range, float numTimeSegments)
- {
- const float round_up = 1.0f+2.0f*float(ulp); // corrects inaccuracies to precisely match time step
- const float round_down = 1.0f-2.0f*float(ulp);
- const int itime_lower = (int)max(floor(round_up *time_range.lower*numTimeSegments), 0.0f);
- const int itime_upper = (int)min(ceil (round_down*time_range.upper*numTimeSegments), numTimeSegments);
- return make_range(itime_lower, itime_upper);
- }
-
- /* calculate overlapping time segment range */
- __forceinline range<int> getTimeSegmentRange(const BBox1f& range, BBox1f time_range, float numTimeSegments)
- {
- const float lower = (range.lower-time_range.lower)/time_range.size();
- const float upper = (range.upper-time_range.lower)/time_range.size();
- return getTimeSegmentRange(BBox1f(lower,upper),numTimeSegments);
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/device.cpp b/thirdparty/embree-aarch64/kernels/common/device.cpp
deleted file mode 100644
index 16ec11b892..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/device.cpp
+++ /dev/null
@@ -1,567 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "device.h"
-#include "../hash.h"
-#include "scene_triangle_mesh.h"
-#include "scene_user_geometry.h"
-#include "scene_instance.h"
-#include "scene_curves.h"
-#include "scene_subdiv_mesh.h"
-
-#include "../subdiv/tessellation_cache.h"
-
-#include "acceln.h"
-#include "geometry.h"
-
-#include "../geometry/cylinder.h"
-
-#include "../bvh/bvh4_factory.h"
-#include "../bvh/bvh8_factory.h"
-
-#include "../../common/tasking/taskscheduler.h"
-#include "../../common/sys/alloc.h"
-
-namespace embree
-{
- /*! some global variables that can be set via rtcSetParameter1i for debugging purposes */
- ssize_t Device::debug_int0 = 0;
- ssize_t Device::debug_int1 = 0;
- ssize_t Device::debug_int2 = 0;
- ssize_t Device::debug_int3 = 0;
-
- DECLARE_SYMBOL2(RayStreamFilterFuncs,rayStreamFilterFuncs);
-
- static MutexSys g_mutex;
- static std::map<Device*,size_t> g_cache_size_map;
- static std::map<Device*,size_t> g_num_threads_map;
-
- Device::Device (const char* cfg)
- {
- /* check that CPU supports lowest ISA */
- if (!hasISA(ISA)) {
- throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support " ISA_STR);
- }
-
- /* set default frequency level for detected CPU */
- switch (getCPUModel()) {
- case CPU::UNKNOWN: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::XEON_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::CORE_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD128; break;
- case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD128; break;
- case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD128; break;
- case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
- case CPU::XEON_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
- case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
- case CPU::XEON_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::CORE_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::XEON_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::CORE_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::XEON_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::CORE_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::SANDY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::NEHALEM: frequency_level = FREQUENCY_SIMD128; break;
- case CPU::CORE2: frequency_level = FREQUENCY_SIMD128; break;
- case CPU::CORE1: frequency_level = FREQUENCY_SIMD128; break;
- }
-
- /* initialize global state */
-#if defined(EMBREE_CONFIG)
- State::parseString(EMBREE_CONFIG);
-#endif
- State::parseString(cfg);
- if (!ignore_config_files && FileName::executableFolder() != FileName(""))
- State::parseFile(FileName::executableFolder()+FileName(".embree" TOSTRING(RTC_VERSION_MAJOR)));
- if (!ignore_config_files && FileName::homeFolder() != FileName(""))
- State::parseFile(FileName::homeFolder()+FileName(".embree" TOSTRING(RTC_VERSION_MAJOR)));
- State::verify();
-
- /* check whether selected ISA is supported by the HW, as the user could have forced an unsupported ISA */
- if (!checkISASupport()) {
- throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support selected ISA");
- }
-
- /*! do some internal tests */
- assert(isa::Cylinder::verify());
-
- /*! enable huge page support if desired */
-#if defined(__WIN32__)
- if (State::enable_selockmemoryprivilege)
- State::hugepages_success &= win_enable_selockmemoryprivilege(State::verbosity(3));
-#endif
- State::hugepages_success &= os_init(State::hugepages,State::verbosity(3));
-
- /*! set tessellation cache size */
- setCacheSize( State::tessellation_cache_size );
-
- /*! enable some floating point exceptions to catch bugs */
- if (State::float_exceptions)
- {
- int exceptions = _MM_MASK_MASK;
- //exceptions &= ~_MM_MASK_INVALID;
- exceptions &= ~_MM_MASK_DENORM;
- exceptions &= ~_MM_MASK_DIV_ZERO;
- //exceptions &= ~_MM_MASK_OVERFLOW;
- //exceptions &= ~_MM_MASK_UNDERFLOW;
- //exceptions &= ~_MM_MASK_INEXACT;
- _MM_SET_EXCEPTION_MASK(exceptions);
- }
-
- /* print info header */
- if (State::verbosity(1))
- print();
- if (State::verbosity(2))
- State::print();
-
- /* register all algorithms */
- bvh4_factory = make_unique(new BVH4Factory(enabled_builder_cpu_features, enabled_cpu_features));
-
-#if defined(EMBREE_TARGET_SIMD8)
- bvh8_factory = make_unique(new BVH8Factory(enabled_builder_cpu_features, enabled_cpu_features));
-#endif
-
- /* setup tasking system */
- initTaskingSystem(numThreads);
-
- /* ray stream SOA to AOS conversion */
-#if defined(EMBREE_RAY_PACKETS)
- RayStreamFilterFuncsType rayStreamFilterFuncs;
- SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(enabled_cpu_features,rayStreamFilterFuncs);
- rayStreamFilters = rayStreamFilterFuncs();
-#endif
- }
-
- Device::~Device ()
- {
- setCacheSize(0);
- exitTaskingSystem();
- }
-
- std::string getEnabledTargets()
- {
- std::string v;
-#if defined(EMBREE_TARGET_SSE2)
- v += "SSE2 ";
-#endif
-#if defined(EMBREE_TARGET_SSE42)
- v += "SSE4.2 ";
-#endif
-#if defined(EMBREE_TARGET_AVX)
- v += "AVX ";
-#endif
-#if defined(EMBREE_TARGET_AVX2)
- v += "AVX2 ";
-#endif
-#if defined(EMBREE_TARGET_AVX512KNL)
- v += "AVX512KNL ";
-#endif
-#if defined(EMBREE_TARGET_AVX512SKX)
- v += "AVX512SKX ";
-#endif
- return v;
- }
-
- std::string getEmbreeFeatures()
- {
- std::string v;
-#if defined(EMBREE_RAY_MASK)
- v += "raymasks ";
-#endif
-#if defined (EMBREE_BACKFACE_CULLING)
- v += "backfaceculling ";
-#endif
-#if defined (EMBREE_BACKFACE_CULLING_CURVES)
- v += "backfacecullingcurves ";
-#endif
-#if defined(EMBREE_FILTER_FUNCTION)
- v += "intersection_filter ";
-#endif
-#if defined (EMBREE_COMPACT_POLYS)
- v += "compact_polys ";
-#endif
- return v;
- }
-
- void Device::print()
- {
- const int cpu_features = getCPUFeatures();
- std::cout << std::endl;
- std::cout << "Embree Ray Tracing Kernels " << RTC_VERSION_STRING << " (" << RTC_HASH << ")" << std::endl;
- std::cout << " Compiler : " << getCompilerName() << std::endl;
- std::cout << " Build : ";
-#if defined(DEBUG)
- std::cout << "Debug " << std::endl;
-#else
- std::cout << "Release " << std::endl;
-#endif
- std::cout << " Platform : " << getPlatformName() << std::endl;
- std::cout << " CPU : " << stringOfCPUModel(getCPUModel()) << " (" << getCPUVendor() << ")" << std::endl;
- std::cout << " Threads : " << getNumberOfLogicalThreads() << std::endl;
- std::cout << " ISA : " << stringOfCPUFeatures(cpu_features) << std::endl;
- std::cout << " Targets : " << supportedTargetList(cpu_features) << std::endl;
- const bool hasFTZ = _mm_getcsr() & _MM_FLUSH_ZERO_ON;
- const bool hasDAZ = _mm_getcsr() & _MM_DENORMALS_ZERO_ON;
- std::cout << " MXCSR : " << "FTZ=" << hasFTZ << ", DAZ=" << hasDAZ << std::endl;
- std::cout << " Config" << std::endl;
- std::cout << " Threads : " << (numThreads ? toString(numThreads) : std::string("default")) << std::endl;
- std::cout << " ISA : " << stringOfCPUFeatures(enabled_cpu_features) << std::endl;
- std::cout << " Targets : " << supportedTargetList(enabled_cpu_features) << " (supported)" << std::endl;
- std::cout << " " << getEnabledTargets() << " (compile time enabled)" << std::endl;
- std::cout << " Features: " << getEmbreeFeatures() << std::endl;
- std::cout << " Tasking : ";
-#if defined(TASKING_TBB)
- std::cout << "TBB" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR << " ";
- #if TBB_INTERFACE_VERSION >= 12002
- std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << TBB_runtime_interface_version() << " ";
- #else
- std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << tbb::TBB_runtime_interface_version() << " ";
- #endif
-#endif
-#if defined(TASKING_INTERNAL)
- std::cout << "internal_tasking_system ";
-#endif
-#if defined(TASKING_GCD) && defined(BUILD_IOS)
- std::cout << "GCD tasking system ";
-#endif
-#if defined(TASKING_PPL)
- std::cout << "PPL ";
-#endif
- std::cout << std::endl;
-
- /* check of FTZ and DAZ flags are set in CSR */
- if (!hasFTZ || !hasDAZ)
- {
-#if !defined(_DEBUG)
- if (State::verbosity(1))
-#endif
- {
- std::cout << std::endl;
- std::cout << "================================================================================" << std::endl;
- std::cout << " WARNING: \"Flush to Zero\" or \"Denormals are Zero\" mode not enabled " << std::endl
- << " in the MXCSR control and status register. This can have a severe " << std::endl
- << " performance impact. Please enable these modes for each application " << std::endl
- << " thread the following way:" << std::endl
- << std::endl
- << " #include \"xmmintrin.h\"" << std::endl
- << " #include \"pmmintrin.h\"" << std::endl
- << std::endl
- << " _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);" << std::endl
- << " _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);" << std::endl;
- std::cout << "================================================================================" << std::endl;
- std::cout << std::endl;
- }
- }
- std::cout << std::endl;
- }
-
- void Device::setDeviceErrorCode(RTCError error)
- {
- RTCError* stored_error = errorHandler.error();
- if (*stored_error == RTC_ERROR_NONE)
- *stored_error = error;
- }
-
- RTCError Device::getDeviceErrorCode()
- {
- RTCError* stored_error = errorHandler.error();
- RTCError error = *stored_error;
- *stored_error = RTC_ERROR_NONE;
- return error;
- }
-
- void Device::setThreadErrorCode(RTCError error)
- {
- RTCError* stored_error = g_errorHandler.error();
- if (*stored_error == RTC_ERROR_NONE)
- *stored_error = error;
- }
-
- RTCError Device::getThreadErrorCode()
- {
- RTCError* stored_error = g_errorHandler.error();
- RTCError error = *stored_error;
- *stored_error = RTC_ERROR_NONE;
- return error;
- }
-
- void Device::process_error(Device* device, RTCError error, const char* str)
- {
- /* store global error code when device construction failed */
- if (!device)
- return setThreadErrorCode(error);
-
- /* print error when in verbose mode */
- if (device->verbosity(1))
- {
- switch (error) {
- case RTC_ERROR_NONE : std::cerr << "Embree: No error"; break;
- case RTC_ERROR_UNKNOWN : std::cerr << "Embree: Unknown error"; break;
- case RTC_ERROR_INVALID_ARGUMENT : std::cerr << "Embree: Invalid argument"; break;
- case RTC_ERROR_INVALID_OPERATION: std::cerr << "Embree: Invalid operation"; break;
- case RTC_ERROR_OUT_OF_MEMORY : std::cerr << "Embree: Out of memory"; break;
- case RTC_ERROR_UNSUPPORTED_CPU : std::cerr << "Embree: Unsupported CPU"; break;
- default : std::cerr << "Embree: Invalid error code"; break;
- };
- if (str) std::cerr << ", (" << str << ")";
- std::cerr << std::endl;
- }
-
- /* call user specified error callback */
- if (device->error_function)
- device->error_function(device->error_function_userptr,error,str);
-
- /* record error code */
- device->setDeviceErrorCode(error);
- }
-
- void Device::memoryMonitor(ssize_t bytes, bool post)
- {
- if (State::memory_monitor_function && bytes != 0) {
- if (!State::memory_monitor_function(State::memory_monitor_userptr,bytes,post)) {
- if (bytes > 0) { // only throw exception when we allocate memory to never throw inside a destructor
- throw_RTCError(RTC_ERROR_OUT_OF_MEMORY,"memory monitor forced termination");
- }
- }
- }
- }
-
- size_t getMaxNumThreads()
- {
- size_t maxNumThreads = 0;
- for (std::map<Device*,size_t>::iterator i=g_num_threads_map.begin(); i != g_num_threads_map.end(); i++)
- maxNumThreads = max(maxNumThreads, (*i).second);
- if (maxNumThreads == 0)
- maxNumThreads = std::numeric_limits<size_t>::max();
- return maxNumThreads;
- }
-
- size_t getMaxCacheSize()
- {
- size_t maxCacheSize = 0;
- for (std::map<Device*,size_t>::iterator i=g_cache_size_map.begin(); i!= g_cache_size_map.end(); i++)
- maxCacheSize = max(maxCacheSize, (*i).second);
- return maxCacheSize;
- }
-
- void Device::setCacheSize(size_t bytes)
- {
-#if defined(EMBREE_GEOMETRY_SUBDIVISION)
- Lock<MutexSys> lock(g_mutex);
- if (bytes == 0) g_cache_size_map.erase(this);
- else g_cache_size_map[this] = bytes;
-
- size_t maxCacheSize = getMaxCacheSize();
- resizeTessellationCache(maxCacheSize);
-#endif
- }
-
- void Device::initTaskingSystem(size_t numThreads)
- {
- Lock<MutexSys> lock(g_mutex);
- if (numThreads == 0)
- g_num_threads_map[this] = std::numeric_limits<size_t>::max();
- else
- g_num_threads_map[this] = numThreads;
-
- /* create task scheduler */
- size_t maxNumThreads = getMaxNumThreads();
- TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
-#if USE_TASK_ARENA
- const size_t nThreads = min(maxNumThreads,TaskScheduler::threadCount());
- const size_t uThreads = min(max(numUserThreads,(size_t)1),nThreads);
- arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads));
-#endif
- }
-
- void Device::exitTaskingSystem()
- {
- Lock<MutexSys> lock(g_mutex);
- g_num_threads_map.erase(this);
-
- /* terminate tasking system */
- if (g_num_threads_map.size() == 0) {
- TaskScheduler::destroy();
- }
- /* or configure new number of threads */
- else {
- size_t maxNumThreads = getMaxNumThreads();
- TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
- }
-#if USE_TASK_ARENA
- arena.reset();
-#endif
- }
-
- void Device::setProperty(const RTCDeviceProperty prop, ssize_t val)
- {
- /* hidden internal properties */
- switch ((size_t)prop)
- {
- case 1000000: debug_int0 = val; return;
- case 1000001: debug_int1 = val; return;
- case 1000002: debug_int2 = val; return;
- case 1000003: debug_int3 = val; return;
- }
-
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown writable property");
- }
-
- ssize_t Device::getProperty(const RTCDeviceProperty prop)
- {
- size_t iprop = (size_t)prop;
-
- /* get name of internal regression test */
- if (iprop >= 2000000 && iprop < 3000000)
- {
- RegressionTest* test = getRegressionTest(iprop-2000000);
- if (test) return (ssize_t) test->name.c_str();
- else return 0;
- }
-
- /* run internal regression test */
- if (iprop >= 3000000 && iprop < 4000000)
- {
- RegressionTest* test = getRegressionTest(iprop-3000000);
- if (test) return test->run();
- else return 0;
- }
-
- /* documented properties */
- switch (prop)
- {
- case RTC_DEVICE_PROPERTY_VERSION_MAJOR: return RTC_VERSION_MAJOR;
- case RTC_DEVICE_PROPERTY_VERSION_MINOR: return RTC_VERSION_MINOR;
- case RTC_DEVICE_PROPERTY_VERSION_PATCH: return RTC_VERSION_PATCH;
- case RTC_DEVICE_PROPERTY_VERSION : return RTC_VERSION;
-
-#if defined(EMBREE_TARGET_SIMD4) && defined(EMBREE_RAY_PACKETS)
- case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return hasISA(SSE2);
-#else
- case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_TARGET_SIMD8) && defined(EMBREE_RAY_PACKETS)
- case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return hasISA(AVX);
-#else
- case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_TARGET_SIMD16) && defined(EMBREE_RAY_PACKETS)
- case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return hasISA(AVX512KNL) | hasISA(AVX512SKX);
-#else
- case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_RAY_PACKETS)
- case RTC_DEVICE_PROPERTY_RAY_STREAM_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_RAY_STREAM_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_RAY_MASK)
- case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_BACKFACE_CULLING)
- case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 0;
-#endif
-
-#if defined(EMBREE_BACKFACE_CULLING_CURVES)
- case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 0;
-#endif
-
-#if defined(EMBREE_COMPACT_POLYS)
- case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 0;
-#endif
-
-#if defined(EMBREE_FILTER_FUNCTION)
- case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_IGNORE_INVALID_RAYS)
- case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 0;
-#endif
-
-#if defined(TASKING_INTERNAL)
- case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 0;
-#endif
-
-#if defined(TASKING_TBB)
- case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 1;
-#endif
-
-#if defined(TASKING_PPL)
- case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 2;
-#endif
-
-#if defined(TASKING_GCD) && defined(BUILD_IOS)
- case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 3;
-#endif
-
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_GEOMETRY_QUAD)
- case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_GEOMETRY_CURVE)
- case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_GEOMETRY_SUBDIVISION)
- case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_GEOMETRY_USER)
- case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_GEOMETRY_POINT)
- case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 0;
-#endif
-
-#if defined(TASKING_PPL)
- case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;
-#elif defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8)
- case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;
-#else
- case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 1;
-#endif
-
-#if defined(TASKING_TBB) && TASKING_TBB_USE_TASK_ISOLATION
- case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 0;
-#endif
-
- default: throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown readable property"); break;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/device.h b/thirdparty/embree-aarch64/kernels/common/device.h
deleted file mode 100644
index e9a81bb109..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/device.h
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "state.h"
-#include "accel.h"
-
-namespace embree
-{
- class BVH4Factory;
- class BVH8Factory;
-
- class Device : public State, public MemoryMonitorInterface
- {
- ALIGNED_CLASS_(16);
-
- public:
-
- /*! Device construction */
- Device (const char* cfg);
-
- /*! Device destruction */
- virtual ~Device ();
-
- /*! prints info about the device */
- void print();
-
- /*! sets the error code */
- void setDeviceErrorCode(RTCError error);
-
- /*! returns and clears the error code */
- RTCError getDeviceErrorCode();
-
- /*! sets the error code */
- static void setThreadErrorCode(RTCError error);
-
- /*! returns and clears the error code */
- static RTCError getThreadErrorCode();
-
- /*! processes error codes, do not call directly */
- static void process_error(Device* device, RTCError error, const char* str);
-
- /*! invokes the memory monitor callback */
- void memoryMonitor(ssize_t bytes, bool post);
-
- /*! sets the size of the software cache. */
- void setCacheSize(size_t bytes);
-
- /*! sets a property */
- void setProperty(const RTCDeviceProperty prop, ssize_t val);
-
- /*! gets a property */
- ssize_t getProperty(const RTCDeviceProperty prop);
-
- private:
-
- /*! initializes the tasking system */
- void initTaskingSystem(size_t numThreads);
-
- /*! shuts down the tasking system */
- void exitTaskingSystem();
-
- /*! some variables that can be set via rtcSetParameter1i for debugging purposes */
- public:
- static ssize_t debug_int0;
- static ssize_t debug_int1;
- static ssize_t debug_int2;
- static ssize_t debug_int3;
-
- public:
- std::unique_ptr<BVH4Factory> bvh4_factory;
-#if defined(EMBREE_TARGET_SIMD8)
- std::unique_ptr<BVH8Factory> bvh8_factory;
-#endif
-
-#if USE_TASK_ARENA
- std::unique_ptr<tbb::task_arena> arena;
-#endif
-
- /* ray streams filter */
- RayStreamFilterFuncs rayStreamFilters;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/geometry.cpp b/thirdparty/embree-aarch64/kernels/common/geometry.cpp
deleted file mode 100644
index b3aa8e3396..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/geometry.cpp
+++ /dev/null
@@ -1,259 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "geometry.h"
-#include "scene.h"
-
-namespace embree
-{
- const char* Geometry::gtype_names[Geometry::GTY_END] =
- {
- "flat_linear_curve",
- "round_linear_curve",
- "oriented_linear_curve",
- "",
- "flat_bezier_curve",
- "round_bezier_curve",
- "oriented_bezier_curve",
- "",
- "flat_bspline_curve",
- "round_bspline_curve",
- "oriented_bspline_curve",
- "",
- "flat_hermite_curve",
- "round_hermite_curve",
- "oriented_hermite_curve",
- "",
- "flat_catmull_rom_curve",
- "round_catmull_rom_curve",
- "oriented_catmull_rom_curve",
- "",
- "triangles",
- "quads",
- "grid",
- "subdivs",
- "",
- "sphere",
- "disc",
- "oriented_disc",
- "",
- "usergeom",
- "instance_cheap",
- "instance_expensive",
- };
-
- Geometry::Geometry (Device* device, GType gtype, unsigned int numPrimitives, unsigned int numTimeSteps)
- : device(device), userPtr(nullptr),
- numPrimitives(numPrimitives), numTimeSteps(unsigned(numTimeSteps)), fnumTimeSegments(float(numTimeSteps-1)), time_range(0.0f,1.0f),
- mask(-1),
- gtype(gtype),
- gsubtype(GTY_SUBTYPE_DEFAULT),
- quality(RTC_BUILD_QUALITY_MEDIUM),
- state((unsigned)State::MODIFIED),
- enabled(true),
- intersectionFilterN(nullptr), occlusionFilterN(nullptr), pointQueryFunc(nullptr)
- {
- device->refInc();
- }
-
- Geometry::~Geometry()
- {
- device->refDec();
- }
-
- void Geometry::setNumPrimitives(unsigned int numPrimitives_in)
- {
- if (numPrimitives_in == numPrimitives) return;
-
- numPrimitives = numPrimitives_in;
-
- Geometry::update();
- }
-
- void Geometry::setNumTimeSteps (unsigned int numTimeSteps_in)
- {
- if (numTimeSteps_in == numTimeSteps) {
- return;
- }
-
- numTimeSteps = numTimeSteps_in;
- fnumTimeSegments = float(numTimeSteps_in-1);
-
- Geometry::update();
- }
-
- void Geometry::setTimeRange (const BBox1f range)
- {
- time_range = range;
- Geometry::update();
- }
-
- void Geometry::update()
- {
- ++modCounter_; // FIXME: required?
- state = (unsigned)State::MODIFIED;
- }
-
- void Geometry::commit()
- {
- ++modCounter_;
- state = (unsigned)State::COMMITTED;
- }
-
- void Geometry::preCommit()
- {
- if (State::MODIFIED == (State)state)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"geometry not committed");
- }
-
- void Geometry::postCommit()
- {
- }
-
- void Geometry::enable ()
- {
- if (isEnabled())
- return;
-
- enabled = true;
- ++modCounter_;
- }
-
- void Geometry::disable ()
- {
- if (isDisabled())
- return;
-
- enabled = false;
- ++modCounter_;
- }
-
- void Geometry::setUserData (void* ptr)
- {
- userPtr = ptr;
- }
-
- void Geometry::setIntersectionFilterFunctionN (RTCFilterFunctionN filter)
- {
- if (!(getTypeMask() & (MTY_TRIANGLE_MESH | MTY_QUAD_MESH | MTY_CURVES | MTY_SUBDIV_MESH | MTY_USER_GEOMETRY | MTY_GRID_MESH)))
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"filter functions not supported for this geometry");
-
- intersectionFilterN = filter;
- }
-
- void Geometry::setOcclusionFilterFunctionN (RTCFilterFunctionN filter)
- {
- if (!(getTypeMask() & (MTY_TRIANGLE_MESH | MTY_QUAD_MESH | MTY_CURVES | MTY_SUBDIV_MESH | MTY_USER_GEOMETRY | MTY_GRID_MESH)))
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"filter functions not supported for this geometry");
-
- occlusionFilterN = filter;
- }
-
- void Geometry::setPointQueryFunction (RTCPointQueryFunction func)
- {
- pointQueryFunc = func;
- }
-
- void Geometry::interpolateN(const RTCInterpolateNArguments* const args)
- {
- const void* valid_i = args->valid;
- const unsigned* primIDs = args->primIDs;
- const float* u = args->u;
- const float* v = args->v;
- unsigned int N = args->N;
- RTCBufferType bufferType = args->bufferType;
- unsigned int bufferSlot = args->bufferSlot;
- float* P = args->P;
- float* dPdu = args->dPdu;
- float* dPdv = args->dPdv;
- float* ddPdudu = args->ddPdudu;
- float* ddPdvdv = args->ddPdvdv;
- float* ddPdudv = args->ddPdudv;
- unsigned int valueCount = args->valueCount;
-
- if (valueCount > 256) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"maximally 256 floating point values can be interpolated per vertex");
- const int* valid = (const int*) valid_i;
-
- __aligned(64) float P_tmp[256];
- __aligned(64) float dPdu_tmp[256];
- __aligned(64) float dPdv_tmp[256];
- __aligned(64) float ddPdudu_tmp[256];
- __aligned(64) float ddPdvdv_tmp[256];
- __aligned(64) float ddPdudv_tmp[256];
-
- float* Pt = P ? P_tmp : nullptr;
- float* dPdut = nullptr, *dPdvt = nullptr;
- if (dPdu) { dPdut = dPdu_tmp; dPdvt = dPdv_tmp; }
- float* ddPdudut = nullptr, *ddPdvdvt = nullptr, *ddPdudvt = nullptr;
- if (ddPdudu) { ddPdudut = ddPdudu_tmp; ddPdvdvt = ddPdvdv_tmp; ddPdudvt = ddPdudv_tmp; }
-
- for (unsigned int i=0; i<N; i++)
- {
- if (valid && !valid[i]) continue;
-
- RTCInterpolateArguments iargs;
- iargs.primID = primIDs[i];
- iargs.u = u[i];
- iargs.v = v[i];
- iargs.bufferType = bufferType;
- iargs.bufferSlot = bufferSlot;
- iargs.P = Pt;
- iargs.dPdu = dPdut;
- iargs.dPdv = dPdvt;
- iargs.ddPdudu = ddPdudut;
- iargs.ddPdvdv = ddPdvdvt;
- iargs.ddPdudv = ddPdudvt;
- iargs.valueCount = valueCount;
- interpolate(&iargs);
-
- if (likely(P)) {
- for (unsigned int j=0; j<valueCount; j++)
- P[j*N+i] = Pt[j];
- }
- if (likely(dPdu))
- {
- for (unsigned int j=0; j<valueCount; j++) {
- dPdu[j*N+i] = dPdut[j];
- dPdv[j*N+i] = dPdvt[j];
- }
- }
- if (likely(ddPdudu))
- {
- for (unsigned int j=0; j<valueCount; j++) {
- ddPdudu[j*N+i] = ddPdudut[j];
- ddPdvdv[j*N+i] = ddPdvdvt[j];
- ddPdudv[j*N+i] = ddPdudvt[j];
- }
- }
- }
- }
-
- bool Geometry::pointQuery(PointQuery* query, PointQueryContext* context)
- {
- assert(context->primID < size());
-
- RTCPointQueryFunctionArguments args;
- args.query = (RTCPointQuery*)context->query_ws;
- args.userPtr = context->userPtr;
- args.primID = context->primID;
- args.geomID = context->geomID;
- args.context = context->userContext;
- args.similarityScale = context->similarityScale;
-
- bool update = false;
- if(context->func) update |= context->func(&args);
- if(pointQueryFunc) update |= pointQueryFunc(&args);
-
- if (update && context->userContext->instStackSize > 0)
- {
- // update point query
- if (context->query_type == POINT_QUERY_TYPE_AABB) {
- context->updateAABB();
- } else {
- assert(context->similarityScale > 0.f);
- query->radius = context->query_ws->radius * context->similarityScale;
- }
- }
- return update;
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/geometry.h b/thirdparty/embree-aarch64/kernels/common/geometry.h
deleted file mode 100644
index 953974bfd2..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/geometry.h
+++ /dev/null
@@ -1,582 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "device.h"
-#include "buffer.h"
-#include "../common/point_query.h"
-#include "../builders/priminfo.h"
-
-namespace embree
-{
- class Scene;
- class Geometry;
-
- struct GeometryCounts
- {
- __forceinline GeometryCounts()
- : numFilterFunctions(0),
- numTriangles(0), numMBTriangles(0),
- numQuads(0), numMBQuads(0),
- numBezierCurves(0), numMBBezierCurves(0),
- numLineSegments(0), numMBLineSegments(0),
- numSubdivPatches(0), numMBSubdivPatches(0),
- numUserGeometries(0), numMBUserGeometries(0),
- numInstancesCheap(0), numMBInstancesCheap(0),
- numInstancesExpensive(0), numMBInstancesExpensive(0),
- numGrids(0), numMBGrids(0),
- numPoints(0), numMBPoints(0) {}
-
- __forceinline size_t size() const {
- return numTriangles + numQuads + numBezierCurves + numLineSegments + numSubdivPatches + numUserGeometries + numInstancesCheap + numInstancesExpensive + numGrids + numPoints
- + numMBTriangles + numMBQuads + numMBBezierCurves + numMBLineSegments + numMBSubdivPatches + numMBUserGeometries + numMBInstancesCheap + numMBInstancesExpensive + numMBGrids + numMBPoints;
- }
-
- __forceinline unsigned int enabledGeometryTypesMask() const
- {
- unsigned int mask = 0;
- if (numTriangles) mask |= 1 << 0;
- if (numQuads) mask |= 1 << 1;
- if (numBezierCurves+numLineSegments) mask |= 1 << 2;
- if (numSubdivPatches) mask |= 1 << 3;
- if (numUserGeometries) mask |= 1 << 4;
- if (numInstancesCheap) mask |= 1 << 5;
- if (numInstancesExpensive) mask |= 1 << 6;
- if (numGrids) mask |= 1 << 7;
- if (numPoints) mask |= 1 << 8;
-
- unsigned int maskMB = 0;
- if (numMBTriangles) maskMB |= 1 << 0;
- if (numMBQuads) maskMB |= 1 << 1;
- if (numMBBezierCurves+numMBLineSegments) maskMB |= 1 << 2;
- if (numMBSubdivPatches) maskMB |= 1 << 3;
- if (numMBUserGeometries) maskMB |= 1 << 4;
- if (numMBInstancesCheap) maskMB |= 1 << 5;
- if (numMBInstancesExpensive) maskMB |= 1 << 6;
- if (numMBGrids) maskMB |= 1 << 7;
- if (numMBPoints) maskMB |= 1 << 8;
-
- return (mask<<8) + maskMB;
- }
-
- __forceinline GeometryCounts operator+ (GeometryCounts const & rhs) const
- {
- GeometryCounts ret;
- ret.numFilterFunctions = numFilterFunctions + rhs.numFilterFunctions;
- ret.numTriangles = numTriangles + rhs.numTriangles;
- ret.numMBTriangles = numMBTriangles + rhs.numMBTriangles;
- ret.numQuads = numQuads + rhs.numQuads;
- ret.numMBQuads = numMBQuads + rhs.numMBQuads;
- ret.numBezierCurves = numBezierCurves + rhs.numBezierCurves;
- ret.numMBBezierCurves = numMBBezierCurves + rhs.numMBBezierCurves;
- ret.numLineSegments = numLineSegments + rhs.numLineSegments;
- ret.numMBLineSegments = numMBLineSegments + rhs.numMBLineSegments;
- ret.numSubdivPatches = numSubdivPatches + rhs.numSubdivPatches;
- ret.numMBSubdivPatches = numMBSubdivPatches + rhs.numMBSubdivPatches;
- ret.numUserGeometries = numUserGeometries + rhs.numUserGeometries;
- ret.numMBUserGeometries = numMBUserGeometries + rhs.numMBUserGeometries;
- ret.numInstancesCheap = numInstancesCheap + rhs.numInstancesCheap;
- ret.numMBInstancesCheap = numMBInstancesCheap + rhs.numMBInstancesCheap;
- ret.numInstancesExpensive = numInstancesExpensive + rhs.numInstancesExpensive;
- ret.numMBInstancesExpensive = numMBInstancesExpensive + rhs.numMBInstancesExpensive;
- ret.numGrids = numGrids + rhs.numGrids;
- ret.numMBGrids = numMBGrids + rhs.numMBGrids;
- ret.numPoints = numPoints + rhs.numPoints;
- ret.numMBPoints = numMBPoints + rhs.numMBPoints;
-
- return ret;
- }
-
- size_t numFilterFunctions; //!< number of geometries with filter functions enabled
- size_t numTriangles; //!< number of enabled triangles
- size_t numMBTriangles; //!< number of enabled motion blured triangles
- size_t numQuads; //!< number of enabled quads
- size_t numMBQuads; //!< number of enabled motion blurred quads
- size_t numBezierCurves; //!< number of enabled curves
- size_t numMBBezierCurves; //!< number of enabled motion blurred curves
- size_t numLineSegments; //!< number of enabled line segments
- size_t numMBLineSegments; //!< number of enabled line motion blurred segments
- size_t numSubdivPatches; //!< number of enabled subdivision patches
- size_t numMBSubdivPatches; //!< number of enabled motion blured subdivision patches
- size_t numUserGeometries; //!< number of enabled user geometries
- size_t numMBUserGeometries; //!< number of enabled motion blurred user geometries
- size_t numInstancesCheap; //!< number of enabled cheap instances
- size_t numMBInstancesCheap; //!< number of enabled motion blurred cheap instances
- size_t numInstancesExpensive; //!< number of enabled expensive instances
- size_t numMBInstancesExpensive; //!< number of enabled motion blurred expensive instances
- size_t numGrids; //!< number of enabled grid geometries
- size_t numMBGrids; //!< number of enabled motion blurred grid geometries
- size_t numPoints; //!< number of enabled points
- size_t numMBPoints; //!< number of enabled motion blurred points
- };
-
- /*! Base class all geometries are derived from */
- class Geometry : public RefCount
- {
- friend class Scene;
- public:
-
- /*! type of geometry */
- enum GType
- {
- GTY_FLAT_LINEAR_CURVE = 0,
- GTY_ROUND_LINEAR_CURVE = 1,
- GTY_ORIENTED_LINEAR_CURVE = 2,
- GTY_CONE_LINEAR_CURVE = 3,
-
- GTY_FLAT_BEZIER_CURVE = 4,
- GTY_ROUND_BEZIER_CURVE = 5,
- GTY_ORIENTED_BEZIER_CURVE = 6,
-
- GTY_FLAT_BSPLINE_CURVE = 8,
- GTY_ROUND_BSPLINE_CURVE = 9,
- GTY_ORIENTED_BSPLINE_CURVE = 10,
-
- GTY_FLAT_HERMITE_CURVE = 12,
- GTY_ROUND_HERMITE_CURVE = 13,
- GTY_ORIENTED_HERMITE_CURVE = 14,
-
- GTY_FLAT_CATMULL_ROM_CURVE = 16,
- GTY_ROUND_CATMULL_ROM_CURVE = 17,
- GTY_ORIENTED_CATMULL_ROM_CURVE = 18,
-
- GTY_TRIANGLE_MESH = 20,
- GTY_QUAD_MESH = 21,
- GTY_GRID_MESH = 22,
- GTY_SUBDIV_MESH = 23,
-
- GTY_SPHERE_POINT = 25,
- GTY_DISC_POINT = 26,
- GTY_ORIENTED_DISC_POINT = 27,
-
- GTY_USER_GEOMETRY = 29,
- GTY_INSTANCE_CHEAP = 30,
- GTY_INSTANCE_EXPENSIVE = 31,
- GTY_END = 32,
-
- GTY_BASIS_LINEAR = 0,
- GTY_BASIS_BEZIER = 4,
- GTY_BASIS_BSPLINE = 8,
- GTY_BASIS_HERMITE = 12,
- GTY_BASIS_CATMULL_ROM = 16,
- GTY_BASIS_MASK = 28,
-
- GTY_SUBTYPE_FLAT_CURVE = 0,
- GTY_SUBTYPE_ROUND_CURVE = 1,
- GTY_SUBTYPE_ORIENTED_CURVE = 2,
- GTY_SUBTYPE_MASK = 3,
- };
-
- enum GSubType
- {
- GTY_SUBTYPE_DEFAULT= 0,
- GTY_SUBTYPE_INSTANCE_LINEAR = 0,
- GTY_SUBTYPE_INSTANCE_QUATERNION = 1
- };
-
- enum GTypeMask
- {
- MTY_FLAT_LINEAR_CURVE = 1ul << GTY_FLAT_LINEAR_CURVE,
- MTY_ROUND_LINEAR_CURVE = 1ul << GTY_ROUND_LINEAR_CURVE,
- MTY_CONE_LINEAR_CURVE = 1ul << GTY_CONE_LINEAR_CURVE,
- MTY_ORIENTED_LINEAR_CURVE = 1ul << GTY_ORIENTED_LINEAR_CURVE,
-
- MTY_FLAT_BEZIER_CURVE = 1ul << GTY_FLAT_BEZIER_CURVE,
- MTY_ROUND_BEZIER_CURVE = 1ul << GTY_ROUND_BEZIER_CURVE,
- MTY_ORIENTED_BEZIER_CURVE = 1ul << GTY_ORIENTED_BEZIER_CURVE,
-
- MTY_FLAT_BSPLINE_CURVE = 1ul << GTY_FLAT_BSPLINE_CURVE,
- MTY_ROUND_BSPLINE_CURVE = 1ul << GTY_ROUND_BSPLINE_CURVE,
- MTY_ORIENTED_BSPLINE_CURVE = 1ul << GTY_ORIENTED_BSPLINE_CURVE,
-
- MTY_FLAT_HERMITE_CURVE = 1ul << GTY_FLAT_HERMITE_CURVE,
- MTY_ROUND_HERMITE_CURVE = 1ul << GTY_ROUND_HERMITE_CURVE,
- MTY_ORIENTED_HERMITE_CURVE = 1ul << GTY_ORIENTED_HERMITE_CURVE,
-
- MTY_FLAT_CATMULL_ROM_CURVE = 1ul << GTY_FLAT_CATMULL_ROM_CURVE,
- MTY_ROUND_CATMULL_ROM_CURVE = 1ul << GTY_ROUND_CATMULL_ROM_CURVE,
- MTY_ORIENTED_CATMULL_ROM_CURVE = 1ul << GTY_ORIENTED_CATMULL_ROM_CURVE,
-
- MTY_CURVE2 = MTY_FLAT_LINEAR_CURVE | MTY_ROUND_LINEAR_CURVE | MTY_CONE_LINEAR_CURVE | MTY_ORIENTED_LINEAR_CURVE,
-
- MTY_CURVE4 = MTY_FLAT_BEZIER_CURVE | MTY_ROUND_BEZIER_CURVE | MTY_ORIENTED_BEZIER_CURVE |
- MTY_FLAT_BSPLINE_CURVE | MTY_ROUND_BSPLINE_CURVE | MTY_ORIENTED_BSPLINE_CURVE |
- MTY_FLAT_HERMITE_CURVE | MTY_ROUND_HERMITE_CURVE | MTY_ORIENTED_HERMITE_CURVE |
- MTY_FLAT_CATMULL_ROM_CURVE | MTY_ROUND_CATMULL_ROM_CURVE | MTY_ORIENTED_CATMULL_ROM_CURVE,
-
- MTY_SPHERE_POINT = 1ul << GTY_SPHERE_POINT,
- MTY_DISC_POINT = 1ul << GTY_DISC_POINT,
- MTY_ORIENTED_DISC_POINT = 1ul << GTY_ORIENTED_DISC_POINT,
-
- MTY_POINTS = MTY_SPHERE_POINT | MTY_DISC_POINT | MTY_ORIENTED_DISC_POINT,
-
- MTY_CURVES = MTY_CURVE2 | MTY_CURVE4 | MTY_POINTS,
-
- MTY_TRIANGLE_MESH = 1ul << GTY_TRIANGLE_MESH,
- MTY_QUAD_MESH = 1ul << GTY_QUAD_MESH,
- MTY_GRID_MESH = 1ul << GTY_GRID_MESH,
- MTY_SUBDIV_MESH = 1ul << GTY_SUBDIV_MESH,
- MTY_USER_GEOMETRY = 1ul << GTY_USER_GEOMETRY,
-
- MTY_INSTANCE_CHEAP = 1ul << GTY_INSTANCE_CHEAP,
- MTY_INSTANCE_EXPENSIVE = 1ul << GTY_INSTANCE_EXPENSIVE,
- MTY_INSTANCE = MTY_INSTANCE_CHEAP | MTY_INSTANCE_EXPENSIVE
- };
-
- static const char* gtype_names[GTY_END];
-
- enum class State : unsigned {
- MODIFIED = 0,
- COMMITTED = 1,
- };
-
- public:
-
- /*! Geometry constructor */
- Geometry (Device* device, GType gtype, unsigned int numPrimitives, unsigned int numTimeSteps);
-
- /*! Geometry destructor */
- virtual ~Geometry();
-
- public:
-
- /*! tests if geometry is enabled */
- __forceinline bool isEnabled() const { return enabled; }
-
- /*! tests if geometry is disabled */
- __forceinline bool isDisabled() const { return !isEnabled(); }
-
- /*! tests if that geometry has some filter function set */
- __forceinline bool hasFilterFunctions () const {
- return (intersectionFilterN != nullptr) || (occlusionFilterN != nullptr);
- }
-
- /*! returns geometry type */
- __forceinline GType getType() const { return gtype; }
-
- /*! returns curve type */
- __forceinline GType getCurveType() const { return (GType)(gtype & GTY_SUBTYPE_MASK); }
-
- /*! returns curve basis */
- __forceinline GType getCurveBasis() const { return (GType)(gtype & GTY_BASIS_MASK); }
-
- /*! returns geometry type mask */
- __forceinline GTypeMask getTypeMask() const { return (GTypeMask)(1 << gtype); }
-
- /*! returns number of primitives */
- __forceinline size_t size() const { return numPrimitives; }
-
- /*! sets the number of primitives */
- virtual void setNumPrimitives(unsigned int numPrimitives_in);
-
- /*! sets number of time steps */
- virtual void setNumTimeSteps (unsigned int numTimeSteps_in);
-
- /*! sets motion blur time range */
- void setTimeRange (const BBox1f range);
-
- /*! sets number of vertex attributes */
- virtual void setVertexAttributeCount (unsigned int N) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! sets number of topologies */
- virtual void setTopologyCount (unsigned int N) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! sets the build quality */
- void setBuildQuality(RTCBuildQuality quality_in)
- {
- this->quality = quality_in;
- Geometry::update();
- }
-
- /* calculate time segment itime and fractional time ftime */
- __forceinline int timeSegment(float time, float& ftime) const {
- return getTimeSegment(time,time_range.lower,time_range.upper,fnumTimeSegments,ftime);
- }
-
- template<int N>
- __forceinline vint<N> timeSegment(const vfloat<N>& time, vfloat<N>& ftime) const {
- return getTimeSegment(time,vfloat<N>(time_range.lower),vfloat<N>(time_range.upper),vfloat<N>(fnumTimeSegments),ftime);
- }
-
- /* calculate overlapping time segment range */
- __forceinline range<int> timeSegmentRange(const BBox1f& range) const {
- return getTimeSegmentRange(range,time_range,fnumTimeSegments);
- }
-
- /* returns time that corresponds to time step */
- __forceinline float timeStep(const int i) const {
- assert(i>=0 && i<(int)numTimeSteps);
- return time_range.lower + time_range.size()*float(i)/fnumTimeSegments;
- }
-
- /*! for all geometries */
- public:
-
- /*! Enable geometry. */
- virtual void enable();
-
- /*! Update geometry. */
- void update();
-
- /*! commit of geometry */
- virtual void commit();
-
- /*! Update geometry buffer. */
- virtual void updateBuffer(RTCBufferType type, unsigned int slot) {
- update(); // update everything for geometries not supporting this call
- }
-
- /*! Disable geometry. */
- virtual void disable();
-
- /*! Verify the geometry */
- virtual bool verify() { return true; }
-
- /*! called before every build */
- virtual void preCommit();
-
- /*! called after every build */
- virtual void postCommit();
-
- virtual void addElementsToCount (GeometryCounts & counts) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- };
-
- /*! sets constant tessellation rate for the geometry */
- virtual void setTessellationRate(float N) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Sets the maximal curve radius scale allowed by min-width feature. */
- virtual void setMaxRadiusScale(float s) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Set user data pointer. */
- virtual void setUserData(void* ptr);
-
- /*! Get user data pointer. */
- __forceinline void* getUserData() const {
- return userPtr;
- }
-
- /*! interpolates user data to the specified u/v location */
- virtual void interpolate(const RTCInterpolateArguments* const args) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! interpolates user data to the specified u/v locations */
- virtual void interpolateN(const RTCInterpolateNArguments* const args);
-
- /* point query api */
- bool pointQuery(PointQuery* query, PointQueryContext* context);
-
- /*! for subdivision surfaces only */
- public:
- virtual void setSubdivisionMode (unsigned topologyID, RTCSubdivisionMode mode) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- virtual void setVertexAttributeTopology(unsigned int vertexBufferSlot, unsigned int indexBufferSlot) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Set displacement function. */
- virtual void setDisplacementFunction (RTCDisplacementFunctionN filter) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- virtual unsigned int getFirstHalfEdge(unsigned int faceID) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- virtual unsigned int getFace(unsigned int edgeID) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- virtual unsigned int getNextHalfEdge(unsigned int edgeID) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- virtual unsigned int getPreviousHalfEdge(unsigned int edgeID) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- virtual unsigned int getOppositeHalfEdge(unsigned int topologyID, unsigned int edgeID) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! get fast access to first vertex buffer if applicable */
- virtual float * getCompactVertexArray () const {
- return nullptr;
- }
-
- /*! Returns the modified counter - how many times the geo has been modified */
- __forceinline unsigned int getModCounter () const {
- return modCounter_;
- }
-
- /*! for triangle meshes and bezier curves only */
- public:
-
-
- /*! Sets ray mask. */
- virtual void setMask(unsigned mask) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Sets specified buffer. */
- virtual void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Gets specified buffer. */
- virtual void* getBuffer(RTCBufferType type, unsigned int slot) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Set intersection filter function for ray packets of size N. */
- virtual void setIntersectionFilterFunctionN (RTCFilterFunctionN filterN);
-
- /*! Set occlusion filter function for ray packets of size N. */
- virtual void setOcclusionFilterFunctionN (RTCFilterFunctionN filterN);
-
- /*! for instances only */
- public:
-
- /*! Sets the instanced scene */
- virtual void setInstancedScene(const Ref<Scene>& scene) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Sets transformation of the instance */
- virtual void setTransform(const AffineSpace3fa& transform, unsigned int timeStep) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Sets transformation of the instance */
- virtual void setQuaternionDecomposition(const AffineSpace3ff& qd, unsigned int timeStep) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Returns the transformation of the instance */
- virtual AffineSpace3fa getTransform(float time) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! for user geometries only */
- public:
-
- /*! Set bounds function. */
- virtual void setBoundsFunction (RTCBoundsFunction bounds, void* userPtr) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Set intersect function for ray packets of size N. */
- virtual void setIntersectFunctionN (RTCIntersectFunctionN intersect) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Set occlusion function for ray packets of size N. */
- virtual void setOccludedFunctionN (RTCOccludedFunctionN occluded) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Set point query function. */
- void setPointQueryFunction(RTCPointQueryFunction func);
-
- /*! returns number of time segments */
- __forceinline unsigned numTimeSegments () const {
- return numTimeSteps-1;
- }
-
- public:
-
- virtual PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefArray not implemented for this geometry");
- }
-
- virtual PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefMBArray not implemented for this geometry");
- }
-
- virtual PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefMBArray not implemented for this geometry");
- }
-
- virtual LinearSpace3fa computeAlignedSpace(const size_t primID) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeAlignedSpace not implemented for this geometry");
- }
-
- virtual LinearSpace3fa computeAlignedSpaceMB(const size_t primID, const BBox1f time_range) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeAlignedSpace not implemented for this geometry");
- }
-
- virtual Vec3fa computeDirection(unsigned int primID) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeDirection not implemented for this geometry");
- }
-
- virtual Vec3fa computeDirection(unsigned int primID, size_t time) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeDirection not implemented for this geometry");
- }
-
- virtual BBox3fa vbounds(size_t primID) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry");
- }
-
- virtual BBox3fa vbounds(const LinearSpace3fa& space, size_t primID) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry");
- }
-
- virtual BBox3fa vbounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry");
- }
-
- virtual LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry");
- }
-
- virtual LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry");
- }
-
- virtual LBBox3fa vlinearBounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry");
- }
-
- public:
- __forceinline bool hasIntersectionFilter() const { return intersectionFilterN != nullptr; }
- __forceinline bool hasOcclusionFilter() const { return occlusionFilterN != nullptr; }
-
- public:
- Device* device; //!< device this geometry belongs to
-
- void* userPtr; //!< user pointer
- unsigned int numPrimitives; //!< number of primitives of this geometry
-
- unsigned int numTimeSteps; //!< number of time steps
- float fnumTimeSegments; //!< number of time segments (precalculation)
- BBox1f time_range; //!< motion blur time range
-
- unsigned int mask; //!< for masking out geometry
- unsigned int modCounter_ = 1; //!< counter for every modification - used to rebuild scenes when geo is modified
-
- struct {
- GType gtype : 8; //!< geometry type
- GSubType gsubtype : 8; //!< geometry subtype
- RTCBuildQuality quality : 3; //!< build quality for geometry
- unsigned state : 2;
- bool enabled : 1; //!< true if geometry is enabled
- };
-
- RTCFilterFunctionN intersectionFilterN;
- RTCFilterFunctionN occlusionFilterN;
- RTCPointQueryFunction pointQueryFunc;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/hit.h b/thirdparty/embree-aarch64/kernels/common/hit.h
deleted file mode 100644
index 32a198cdfe..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/hit.h
+++ /dev/null
@@ -1,114 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "ray.h"
-#include "instance_stack.h"
-
-namespace embree
-{
- /* Hit structure for K hits */
- template<int K>
- struct HitK
- {
- /* Default construction does nothing */
- __forceinline HitK() {}
-
- /* Constructs a hit */
- __forceinline HitK(const RTCIntersectContext* context, const vuint<K>& geomID, const vuint<K>& primID, const vfloat<K>& u, const vfloat<K>& v, const Vec3vf<K>& Ng)
- : Ng(Ng), u(u), v(v), primID(primID), geomID(geomID)
- {
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
- instID[l] = RTC_INVALID_GEOMETRY_ID;
- instance_id_stack::copy(context->instID, instID);
- }
-
- /* Returns the size of the hit */
- static __forceinline size_t size() { return K; }
-
- public:
- Vec3vf<K> Ng; // geometry normal
- vfloat<K> u; // barycentric u coordinate of hit
- vfloat<K> v; // barycentric v coordinate of hit
- vuint<K> primID; // primitive ID
- vuint<K> geomID; // geometry ID
- vuint<K> instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
- };
-
- /* Specialization for a single hit */
- template<>
- struct __aligned(16) HitK<1>
- {
- /* Default construction does nothing */
- __forceinline HitK() {}
-
- /* Constructs a hit */
- __forceinline HitK(const RTCIntersectContext* context, unsigned int geomID, unsigned int primID, float u, float v, const Vec3fa& Ng)
- : Ng(Ng.x,Ng.y,Ng.z), u(u), v(v), primID(primID), geomID(geomID)
- {
- instance_id_stack::copy(context->instID, instID);
- }
-
- /* Returns the size of the hit */
- static __forceinline size_t size() { return 1; }
-
- public:
- Vec3<float> Ng; // geometry normal
- float u; // barycentric u coordinate of hit
- float v; // barycentric v coordinate of hit
- unsigned int primID; // primitive ID
- unsigned int geomID; // geometry ID
- unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
- };
-
- /* Shortcuts */
- typedef HitK<1> Hit;
- typedef HitK<4> Hit4;
- typedef HitK<8> Hit8;
- typedef HitK<16> Hit16;
-
- /* Outputs hit to stream */
- template<int K>
- __forceinline embree_ostream operator<<(embree_ostream cout, const HitK<K>& ray)
- {
- cout << "{ " << embree_endl
- << " Ng = " << ray.Ng << embree_endl
- << " u = " << ray.u << embree_endl
- << " v = " << ray.v << embree_endl
- << " primID = " << ray.primID << embree_endl
- << " geomID = " << ray.geomID << embree_endl
- << " instID =";
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
- {
- cout << " " << ray.instID[l];
- }
- cout << embree_endl;
- return cout << "}";
- }
-
- template<typename Hit>
- __forceinline void copyHitToRay(RayHit& ray, const Hit& hit)
- {
- ray.Ng = hit.Ng;
- ray.u = hit.u;
- ray.v = hit.v;
- ray.primID = hit.primID;
- ray.geomID = hit.geomID;
- instance_id_stack::copy(hit.instID, ray.instID);
- }
-
- template<int K>
- __forceinline void copyHitToRay(const vbool<K> &mask, RayHitK<K> &ray, const HitK<K> &hit)
- {
- vfloat<K>::storeu(mask,&ray.Ng.x, hit.Ng.x);
- vfloat<K>::storeu(mask,&ray.Ng.y, hit.Ng.y);
- vfloat<K>::storeu(mask,&ray.Ng.z, hit.Ng.z);
- vfloat<K>::storeu(mask,&ray.u, hit.u);
- vfloat<K>::storeu(mask,&ray.v, hit.v);
- vuint<K>::storeu(mask,&ray.primID, hit.primID);
- vuint<K>::storeu(mask,&ray.geomID, hit.geomID);
- instance_id_stack::copy(hit.instID, ray.instID, mask);
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/instance_stack.h b/thirdparty/embree-aarch64/kernels/common/instance_stack.h
deleted file mode 100644
index d7e3637f7b..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/instance_stack.h
+++ /dev/null
@@ -1,199 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "rtcore.h"
-
-namespace embree {
-namespace instance_id_stack {
-
-static_assert(RTC_MAX_INSTANCE_LEVEL_COUNT > 0,
- "RTC_MAX_INSTANCE_LEVEL_COUNT must be greater than 0.");
-
-/*******************************************************************************
- * Instance ID stack manipulation.
- * This is used from the instance intersector.
- ******************************************************************************/
-
-/*
- * Push an instance to the stack.
- */
-RTC_FORCEINLINE bool push(RTCIntersectContext* context,
- unsigned instanceId)
-{
-#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
- const bool spaceAvailable = context->instStackSize < RTC_MAX_INSTANCE_LEVEL_COUNT;
- /* We assert here because instances are silently dropped when the stack is full.
- This might be quite hard to find in production. */
- assert(spaceAvailable);
- if (likely(spaceAvailable))
- context->instID[context->instStackSize++] = instanceId;
- return spaceAvailable;
-#else
- const bool spaceAvailable = (context->instID[0] == RTC_INVALID_GEOMETRY_ID);
- assert(spaceAvailable);
- if (likely(spaceAvailable))
- context->instID[0] = instanceId;
- return spaceAvailable;
-#endif
-}
-
-
-/*
- * Pop the last instance pushed to the stack.
- * Do not call on an empty stack.
- */
-RTC_FORCEINLINE void pop(RTCIntersectContext* context)
-{
- assert(context);
-#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
- assert(context->instStackSize > 0);
- context->instID[--context->instStackSize] = RTC_INVALID_GEOMETRY_ID;
-#else
- assert(context->instID[0] != RTC_INVALID_GEOMETRY_ID);
- context->instID[0] = RTC_INVALID_GEOMETRY_ID;
-#endif
-}
-
-/*******************************************************************************
- * Optimized instance id stack copy.
- * The copy() function at the bottom of this block will either copy full
- * stacks or copy only until the last valid element has been copied, depending
- * on RTC_MAX_INSTANCE_LEVEL_COUNT.
- ******************************************************************************/
-
-/*
- * Plain array assignment. This works for scalar->scalar,
- * scalar->vector, and vector->vector.
- */
-template <class Src, class Tgt>
-RTC_FORCEINLINE void level_copy(unsigned level, Src* src, Tgt* tgt)
-{
- tgt[level] = src[level];
-}
-
-/*
- * Masked SIMD vector->vector store.
- */
-template <int K>
-RTC_FORCEINLINE void level_copy(unsigned level, const vuint<K>* src, vuint<K>* tgt, const vbool<K>& mask)
-{
- vuint<K>::storeu(mask, tgt + level, src[level]);
-}
-
-/*
- * Masked scalar->SIMD vector store.
- */
-template <int K>
-RTC_FORCEINLINE void level_copy(unsigned level, const unsigned* src, vuint<K>* tgt, const vbool<K>& mask)
-{
- vuint<K>::store(mask, tgt + level, src[level]);
-}
-
-/*
- * Indexed assign from vector to scalar.
- */
-template <int K>
-RTC_FORCEINLINE void level_copy(unsigned level, const vuint<K>* src, unsigned* tgt, const size_t& idx)
-{
- tgt[level] = src[level][idx];
-}
-
-/*
- * Indexed assign from scalar to vector.
- */
-template <int K>
-RTC_FORCEINLINE void level_copy(unsigned level, const unsigned* src, vuint<K>* tgt, const size_t& idx)
-{
- tgt[level][idx] = src[level];
-}
-
-/*
- * Indexed assign from vector to vector.
- */
-template <int K>
-RTC_FORCEINLINE void level_copy(unsigned level, const vuint<K>* src, vuint<K>* tgt, const size_t& i, const size_t& j)
-{
- tgt[level][j] = src[level][i];
-}
-
-/*
- * Check if the given stack level is valid.
- * These are only used for large max stack sizes.
- */
-RTC_FORCEINLINE bool level_valid(unsigned level, const unsigned* stack)
-{
- return stack[level] != RTC_INVALID_GEOMETRY_ID;
-}
-RTC_FORCEINLINE bool level_valid(unsigned level, const unsigned* stack, const size_t& /*i*/)
-{
- return stack[level] != RTC_INVALID_GEOMETRY_ID;
-}
-template <int K>
-RTC_FORCEINLINE bool level_valid(unsigned level, const unsigned* stack, const vbool<K>& /*mask*/)
-{
- return stack[level] != RTC_INVALID_GEOMETRY_ID;
-}
-
-template <int K>
-RTC_FORCEINLINE bool level_valid(unsigned level, const vuint<K>* stack)
-{
- return any(stack[level] != RTC_INVALID_GEOMETRY_ID);
-}
-template <int K>
-RTC_FORCEINLINE bool level_valid(unsigned level, const vuint<K>* stack, const vbool<K>& mask)
-{
- return any(mask & (stack[level] != RTC_INVALID_GEOMETRY_ID));
-}
-
-template <int K>
-RTC_FORCEINLINE bool level_valid(unsigned level, const vuint<K>* stack, const size_t& i)
-{
- return stack[level][i] != RTC_INVALID_GEOMETRY_ID;
-}
-template <int K>
-RTC_FORCEINLINE bool level_valid(unsigned level, const vuint<K>* stack, const size_t& i, const size_t& /*j*/)
-{
- return stack[level][i] != RTC_INVALID_GEOMETRY_ID;
-}
-
-/*
- * Copy an instance ID stack.
- *
- * This function automatically selects a LevelFunctor from the above Assign
- * structs.
- */
-template <class Src, class Tgt, class... Args>
-RTC_FORCEINLINE void copy(Src src, Tgt tgt, Args&&... args)
-{
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
- /*
- * Avoid all loops for only one level.
- */
- level_copy(0, src, tgt, std::forward<Args>(args)...);
-
-#elif (RTC_MAX_INSTANCE_LEVEL_COUNT <= 4)
- /*
- * It is faster to avoid the valid test for low level counts.
- * Just copy the whole stack.
- */
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
- level_copy(l, src, tgt, std::forward<Args>(args)...);
-
-#else
- /*
- * For general stack sizes, it pays off to test for validity.
- */
- bool valid = true;
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT && valid; ++l)
- {
- level_copy(l, src, tgt, std::forward<Args>(args)...);
- valid = level_valid(l, src, std::forward<Args>(args)...);
- }
-#endif
-}
-
-} // namespace instance_id_stack
-} // namespace embree
-
diff --git a/thirdparty/embree-aarch64/kernels/common/isa.h b/thirdparty/embree-aarch64/kernels/common/isa.h
deleted file mode 100644
index 63fb8d3351..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/isa.h
+++ /dev/null
@@ -1,271 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../../common/sys/platform.h"
-#include "../../common/sys/sysinfo.h"
-
-namespace embree
-{
-#define DEFINE_SYMBOL2(type,name) \
- typedef type (*name##Func)(); \
- name##Func name;
-
-#define DECLARE_SYMBOL2(type,name) \
- namespace sse2 { extern type name(); } \
- namespace sse42 { extern type name(); } \
- namespace avx { extern type name(); } \
- namespace avx2 { extern type name(); } \
- namespace avx512knl { extern type name(); } \
- namespace avx512skx { extern type name(); } \
- void name##_error2() { throw_RTCError(RTC_ERROR_UNKNOWN,"internal error in ISA selection for " TOSTRING(name)); } \
- type name##_error() { return type(name##_error2); } \
- type name##_zero() { return type(nullptr); }
-
-#define DECLARE_ISA_FUNCTION(type,symbol,args) \
- namespace sse2 { extern type symbol(args); } \
- namespace sse42 { extern type symbol(args); } \
- namespace avx { extern type symbol(args); } \
- namespace avx2 { extern type symbol(args); } \
- namespace avx512knl { extern type symbol(args); } \
- namespace avx512skx { extern type symbol(args); } \
- inline type symbol##_error(args) { throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"function " TOSTRING(symbol) " not supported by your CPU"); } \
- typedef type (*symbol##Ty)(args); \
-
-#define DEFINE_ISA_FUNCTION(type,symbol,args) \
- typedef type (*symbol##Func)(args); \
- symbol##Func symbol;
-
-#define ZERO_SYMBOL(features,intersector) \
- intersector = intersector##_zero;
-
-#define INIT_SYMBOL(features,intersector) \
- intersector = decltype(intersector)(intersector##_error);
-
-#define SELECT_SYMBOL_DEFAULT(features,intersector) \
- intersector = isa::intersector;
-
-#if defined(__SSE__) || defined(__ARM_NEON)
-#if !defined(EMBREE_TARGET_SIMD4)
-#define EMBREE_TARGET_SIMD4
-#endif
-#endif
-
-#if defined(EMBREE_TARGET_SSE42)
-#define SELECT_SYMBOL_SSE42(features,intersector) \
- if ((features & SSE42) == SSE42) intersector = sse42::intersector;
-#else
-#define SELECT_SYMBOL_SSE42(features,intersector)
-#endif
-
-#if defined(EMBREE_TARGET_AVX) || defined(__AVX__)
-#if !defined(EMBREE_TARGET_SIMD8)
-#define EMBREE_TARGET_SIMD8
-#endif
-#if defined(__AVX__) // if default ISA is >= AVX we treat AVX target as default target
-#define SELECT_SYMBOL_AVX(features,intersector) \
- if ((features & ISA) == ISA) intersector = isa::intersector;
-#else
-#define SELECT_SYMBOL_AVX(features,intersector) \
- if ((features & AVX) == AVX) intersector = avx::intersector;
-#endif
-#else
-#define SELECT_SYMBOL_AVX(features,intersector)
-#endif
-
-#if defined(EMBREE_TARGET_AVX2)
-#if !defined(EMBREE_TARGET_SIMD8)
-#define EMBREE_TARGET_SIMD8
-#endif
-#define SELECT_SYMBOL_AVX2(features,intersector) \
- if ((features & AVX2) == AVX2) intersector = avx2::intersector;
-#else
-#define SELECT_SYMBOL_AVX2(features,intersector)
-#endif
-
-#if defined(EMBREE_TARGET_AVX512KNL)
-#if !defined(EMBREE_TARGET_SIMD16)
-#define EMBREE_TARGET_SIMD16
-#endif
-#define SELECT_SYMBOL_AVX512KNL(features,intersector) \
- if ((features & AVX512KNL) == AVX512KNL) intersector = avx512knl::intersector;
-#else
-#define SELECT_SYMBOL_AVX512KNL(features,intersector)
-#endif
-
-#if defined(EMBREE_TARGET_AVX512SKX)
-#if !defined(EMBREE_TARGET_SIMD16)
-#define EMBREE_TARGET_SIMD16
-#endif
-#define SELECT_SYMBOL_AVX512SKX(features,intersector) \
- if ((features & AVX512SKX) == AVX512SKX) intersector = avx512skx::intersector;
-#else
-#define SELECT_SYMBOL_AVX512SKX(features,intersector)
-#endif
-
-#define SELECT_SYMBOL_DEFAULT_SSE42(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_SSE42_AVX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_AVX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_AVX_AVX2(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL_AVX512SKX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_AVX_AVX512SKX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_INIT_AVX(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector);
-
-#define SELECT_SYMBOL_INIT_AVX_AVX2(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector);
-
-#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_INIT_SSE42_AVX_AVX2(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector);
-
-#define SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector);
-
-#define SELECT_SYMBOL_INIT_AVX_AVX512KNL_AVX512SKX(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector);
-
-#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_INIT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_ZERO_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \
- ZERO_SYMBOL(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_SSE42_AVX_AVX2(features,intersector) \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector);
-
- struct VerifyMultiTargetLinking {
- static __noinline int getISA(int depth = 5) {
- if (depth == 0) return ISA;
- else return getISA(depth-1);
- }
- };
- namespace sse2 { int getISA(); };
- namespace sse42 { int getISA(); };
- namespace avx { int getISA(); };
- namespace avx2 { int getISA(); };
- namespace avx512knl { int getISA(); };
- namespace avx512skx { int getISA(); };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/motion_derivative.h b/thirdparty/embree-aarch64/kernels/common/motion_derivative.h
deleted file mode 100644
index 82953f0e89..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/motion_derivative.h
+++ /dev/null
@@ -1,325 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../../common/math/affinespace.h"
-#include "../../common/math/interval.h"
-
-#include <functional>
-
-namespace embree {
-
-#define MOTION_DERIVATIVE_ROOT_EPSILON 1e-4f
-
-static void motion_derivative_coefficients(const float *p, float *coeff);
-
-struct MotionDerivativeCoefficients
-{
- float theta;
- float coeffs[3*8*7];
-
- MotionDerivativeCoefficients() {}
-
- // xfm0 and xfm1 are interpret as quaternion decomposition
- MotionDerivativeCoefficients(AffineSpace3ff const& xfm0, AffineSpace3ff const& xfm1)
- {
- // cosTheta of the two quaternions
- const float cosTheta = min(1.f, max(-1.f,
- xfm0.l.vx.w * xfm1.l.vx.w
- + xfm0.l.vy.w * xfm1.l.vy.w
- + xfm0.l.vz.w * xfm1.l.vz.w
- + xfm0.p.w * xfm1.p.w));
-
- theta = std::acos(cosTheta);
- Vec4f qperp(xfm1.p.w, xfm1.l.vx.w, xfm1.l.vy.w, xfm1.l.vz.w);
- if (cosTheta < 0.995f) {
- // compute perpendicular quaternion
- qperp.x = xfm1.p.w - cosTheta * xfm0.p.w;
- qperp.y = xfm1.l.vx.w - cosTheta * xfm0.l.vx.w;
- qperp.z = xfm1.l.vy.w - cosTheta * xfm0.l.vy.w;
- qperp.w = xfm1.l.vz.w - cosTheta * xfm0.l.vz.w;
- qperp = normalize(qperp);
- }
- const float p[33] = {
- theta,
- xfm0.l.vx.y, xfm0.l.vx.z, xfm0.l.vy.z, // translation component of xfm0
- xfm1.l.vx.y, xfm1.l.vx.z, xfm1.l.vy.z, // translation component of xfm1
- xfm0.p.w, xfm0.l.vx.w, xfm0.l.vy.w, xfm0.l.vz.w, // quaternion of xfm0
- qperp.x, qperp.y, qperp.z, qperp.w,
- xfm0.l.vx.x, xfm0.l.vy.x, xfm0.l.vz.x, xfm0.p.x, // scale/skew component of xfm0
- xfm0.l.vy.y, xfm0.l.vz.y, xfm0.p.y,
- xfm0.l.vz.z, xfm0.p.z,
- xfm1.l.vx.x, xfm1.l.vy.x, xfm1.l.vz.x, xfm1.p.x, // scale/skew component of xfm1
- xfm1.l.vy.y, xfm1.l.vz.y, xfm1.p.y,
- xfm1.l.vz.z, xfm1.p.z
- };
- motion_derivative_coefficients(p, coeffs);
- }
-};
-
-struct MotionDerivative
-{
- float twoTheta;
- float c[8];
-
- MotionDerivative(MotionDerivativeCoefficients const& mdc,
- int dim, Vec3fa const& p0, Vec3fa const& p1)
- : twoTheta(2.f*mdc.theta)
- {
- const float p[7] = { 1, p0.x, p0.y, p0.z, p1.x, p1.y, p1.z };
- for (int i = 0; i < 8; ++i) {
- c[i] = 0;
- for (int j = 0; j < 7; ++j) {
- c[i] += mdc.coeffs[8*7*dim + i*7 + j] * p[j];
- }
- }
- }
-
- template<typename T>
- struct EvalMotionDerivative
- {
- MotionDerivative const& md;
- float offset;
-
- EvalMotionDerivative(MotionDerivative const& md, float offset) : md(md), offset(offset) {}
-
- T operator()(T const& time) const {
- return md.c[0] + md.c[1] * time
- + (md.c[2] + md.c[3] * time + md.c[4] * time * time) * cos(md.twoTheta * time)
- + (md.c[5] + md.c[6] * time + md.c[7] * time * time) * sin(md.twoTheta * time)
- + offset;
- }
- };
-
- unsigned int findRoots(
- Interval1f const& interval,
- float offset,
- float* roots,
- unsigned int maxNumRoots)
- {
- unsigned int numRoots = 0;
- EvalMotionDerivative<Interval1f> eval(*this, offset);
- findRoots(eval, interval, numRoots, roots, maxNumRoots);
- return numRoots;
- }
-
- template<typename Eval>
- static void findRoots(
-
- Eval const& eval,
- Interval1f const& interval,
- unsigned int& numRoots,
- float* roots,
- unsigned int maxNumRoots)
- {
- Interval1f range = eval(interval);
- if (range.lower > 0 || range.upper < 0 || range.lower >= range.upper) return;
-
- const float split = 0.5f * (interval.upper + interval.lower);
- if (interval.upper-interval.lower < 1e-7f || abs(split-interval.lower) < 1e-7f || abs(split-interval.upper) < 1e-7f)
- {
- // check if the root already exists
- for (unsigned int k = 0; k < numRoots && k < maxNumRoots; ++k) {
- if (abs(roots[k]-split) < MOTION_DERIVATIVE_ROOT_EPSILON)
- return;
- }
- if (numRoots < maxNumRoots) {
- roots[numRoots++] = split;
- }
- if (numRoots > maxNumRoots) {
- printf("error: more roots than expected\n"); // FIXME: workaround for ICC2019.4 compiler bug under macOS
- return;
- }
- return;
- }
-
- findRoots(eval, Interval1f(interval.lower, split), numRoots, roots, maxNumRoots);
- findRoots(eval, Interval1f(split, interval.upper), numRoots, roots, maxNumRoots);
- }
-};
-
-/******************************************************************************
- * Code generated with sympy 1.4 *
- * See http://www.sympy.org/ for more information. *
- * *
- * see *
- * *
- * scripts/generate_motion_derivative_coefficients.py *
- * *
- * for how this code is generated *
- * *
- ******************************************************************************/
-static void motion_derivative_coefficients(const float *p, float *coeff)
-{
- coeff[0] = -p[1] + p[4] - p[7]*p[9]*p[23] + p[7]*p[9]*p[32] + p[7]*p[10]*p[21] - p[7]*p[10]*p[30] - p[8]*p[9]*p[21] + p[8]*p[9]*p[30] - p[8]*p[10]*p[23] + p[8]*p[10]*p[32] + p[9]*p[9]*p[18] - p[9]*p[9]*p[27] + p[10]*p[10]*p[18] - p[10]*p[10]*p[27] - p[11]*p[13]*p[23] + p[11]*p[13]*p[32] + p[11]*p[14]*p[21] - p[11]*p[14]*p[30] - p[12]*p[13]*p[21] + p[12]*p[13]*p[30] - p[12]*p[14]*p[23] + p[12]*p[14]*p[32] + p[13]*p[13]*p[18] - p[13]*p[13]*p[27] + p[14]*p[14]*p[18] - p[14]*p[14]*p[27] - p[18] + p[27];
- coeff[1] = 2*p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - p[14]*p[14]*p[24] - 2*p[15] + p[24];
- coeff[2] = 2*p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - p[14]*p[14]*p[25] - 2*p[16] + p[25];
- coeff[3] = -2*p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - p[14]*p[14]*p[26] - 2*p[17] + p[26];
- coeff[4] = (-p[9]*p[9] - p[10]*p[10] - p[13]*p[13] - p[14]*p[14] + 1)*p[15];
- coeff[5] = -p[7]*p[10]*p[19] + p[8]*p[9]*p[19] - p[9]*p[9]*p[16] - p[10]*p[10]*p[16] - p[11]*p[14]*p[19] + p[12]*p[13]*p[19] - p[13]*p[13]*p[16] - p[14]*p[14]*p[16] + p[16];
- coeff[6] = p[7]*p[9]*p[22] - p[7]*p[10]*p[20] + p[8]*p[9]*p[20] + p[8]*p[10]*p[22] - p[9]*p[9]*p[17] - p[10]*p[10]*p[17] + p[11]*p[13]*p[22] - p[11]*p[14]*p[20] + p[12]*p[13]*p[20] + p[12]*p[14]*p[22] - p[13]*p[13]*p[17] - p[14]*p[14]*p[17] + p[17];
- coeff[7] = 0;
- coeff[8] = -2*p[9]*p[9]*p[15] + 2*p[9]*p[9]*p[24] - 2*p[10]*p[10]*p[15] + 2*p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + 2*p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + 2*p[14]*p[14]*p[24] + 2*p[15] - 2*p[24];
- coeff[9] = -2*p[7]*p[10]*p[19] + 2*p[7]*p[10]*p[28] + 2*p[8]*p[9]*p[19] - 2*p[8]*p[9]*p[28] - 2*p[9]*p[9]*p[16] + 2*p[9]*p[9]*p[25] - 2*p[10]*p[10]*p[16] + 2*p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + 2*p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - 2*p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + 2*p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + 2*p[14]*p[14]*p[25] + 2*p[16] - 2*p[25];
- coeff[10] = 2*p[7]*p[9]*p[22] - 2*p[7]*p[9]*p[31] - 2*p[7]*p[10]*p[20] + 2*p[7]*p[10]*p[29] + 2*p[8]*p[9]*p[20] - 2*p[8]*p[9]*p[29] + 2*p[8]*p[10]*p[22] - 2*p[8]*p[10]*p[31] - 2*p[9]*p[9]*p[17] + 2*p[9]*p[9]*p[26] - 2*p[10]*p[10]*p[17] + 2*p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - 2*p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + 2*p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - 2*p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - 2*p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + 2*p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + 2*p[14]*p[14]*p[26] + 2*p[17] - 2*p[26];
- coeff[11] = 2*p[9]*p[9]*p[15] - 2*p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - 2*p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - 2*p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - 2*p[14]*p[14]*p[24] - 2*p[15] + 2*p[24];
- coeff[12] = 2*p[7]*p[10]*p[19] - 2*p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + 2*p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - 2*p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - 2*p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - 2*p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + 2*p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - 2*p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - 2*p[14]*p[14]*p[25] - 2*p[16] + 2*p[25];
- coeff[13] = -2*p[7]*p[9]*p[22] + 2*p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - 2*p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + 2*p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + 2*p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - 2*p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - 2*p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + 2*p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - 2*p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + 2*p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + 2*p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - 2*p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - 2*p[14]*p[14]*p[26] - 2*p[17] + 2*p[26];
- coeff[14] = 2*p[0]*p[7]*p[11]*p[18] + 2*p[0]*p[7]*p[13]*p[23] - 2*p[0]*p[7]*p[14]*p[21] + 2*p[0]*p[8]*p[12]*p[18] + 2*p[0]*p[8]*p[13]*p[21] + 2*p[0]*p[8]*p[14]*p[23] + 2*p[0]*p[9]*p[11]*p[23] + 2*p[0]*p[9]*p[12]*p[21] - 2*p[0]*p[9]*p[13]*p[18] - 2*p[0]*p[10]*p[11]*p[21] + 2*p[0]*p[10]*p[12]*p[23] - 2*p[0]*p[10]*p[14]*p[18] - p[7]*p[9]*p[23] + p[7]*p[9]*p[32] + p[7]*p[10]*p[21] - p[7]*p[10]*p[30] - p[8]*p[9]*p[21] + p[8]*p[9]*p[30] - p[8]*p[10]*p[23] + p[8]*p[10]*p[32] + p[9]*p[9]*p[18] - p[9]*p[9]*p[27] + p[10]*p[10]*p[18] - p[10]*p[10]*p[27] + p[11]*p[13]*p[23] - p[11]*p[13]*p[32] - p[11]*p[14]*p[21] + p[11]*p[14]*p[30] + p[12]*p[13]*p[21] - p[12]*p[13]*p[30] + p[12]*p[14]*p[23] - p[12]*p[14]*p[32] - p[13]*p[13]*p[18] + p[13]*p[13]*p[27] - p[14]*p[14]*p[18] + p[14]*p[14]*p[27];
- coeff[15] = 2*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[8]*p[12]*p[15] - 2*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[10]*p[14]*p[15] + 2*p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + p[14]*p[14]*p[24];
- coeff[16] = 2*p[0]*p[7]*p[11]*p[16] - 2*p[0]*p[7]*p[14]*p[19] + 2*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[9]*p[12]*p[19] - 2*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[14]*p[16] + 2*p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + p[14]*p[14]*p[25];
- coeff[17] = 2*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[13]*p[22] - 2*p[0]*p[7]*p[14]*p[20] + 2*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[12]*p[20] - 2*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[10]*p[11]*p[20] + 2*p[0]*p[10]*p[12]*p[22] - 2*p[0]*p[10]*p[14]*p[17] - 2*p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + p[14]*p[14]*p[26];
- coeff[18] = (-p[9]*p[9] - p[10]*p[10] + p[13]*p[13] + p[14]*p[14])*p[15];
- coeff[19] = -p[7]*p[10]*p[19] + p[8]*p[9]*p[19] - p[9]*p[9]*p[16] - p[10]*p[10]*p[16] + p[11]*p[14]*p[19] - p[12]*p[13]*p[19] + p[13]*p[13]*p[16] + p[14]*p[14]*p[16];
- coeff[20] = p[7]*p[9]*p[22] - p[7]*p[10]*p[20] + p[8]*p[9]*p[20] + p[8]*p[10]*p[22] - p[9]*p[9]*p[17] - p[10]*p[10]*p[17] - p[11]*p[13]*p[22] + p[11]*p[14]*p[20] - p[12]*p[13]*p[20] - p[12]*p[14]*p[22] + p[13]*p[13]*p[17] + p[14]*p[14]*p[17];
- coeff[21] = 2*(-p[7]*p[11]*p[18] + p[7]*p[11]*p[27] - p[7]*p[13]*p[23] + p[7]*p[13]*p[32] + p[7]*p[14]*p[21] - p[7]*p[14]*p[30] - p[8]*p[12]*p[18] + p[8]*p[12]*p[27] - p[8]*p[13]*p[21] + p[8]*p[13]*p[30] - p[8]*p[14]*p[23] + p[8]*p[14]*p[32] - p[9]*p[11]*p[23] + p[9]*p[11]*p[32] - p[9]*p[12]*p[21] + p[9]*p[12]*p[30] + p[9]*p[13]*p[18] - p[9]*p[13]*p[27] + p[10]*p[11]*p[21] - p[10]*p[11]*p[30] - p[10]*p[12]*p[23] + p[10]*p[12]*p[32] + p[10]*p[14]*p[18] - p[10]*p[14]*p[27])*p[0];
- coeff[22] = -4*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[7]*p[11]*p[24] - 4*p[0]*p[8]*p[12]*p[15] + 2*p[0]*p[8]*p[12]*p[24] + 4*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[9]*p[13]*p[24] + 4*p[0]*p[10]*p[14]*p[15] - 2*p[0]*p[10]*p[14]*p[24] - 2*p[9]*p[9]*p[15] + 2*p[9]*p[9]*p[24] - 2*p[10]*p[10]*p[15] + 2*p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - 2*p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - 2*p[14]*p[14]*p[24];
- coeff[23] = -4*p[0]*p[7]*p[11]*p[16] + 2*p[0]*p[7]*p[11]*p[25] + 4*p[0]*p[7]*p[14]*p[19] - 2*p[0]*p[7]*p[14]*p[28] - 4*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[12]*p[25] - 4*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[8]*p[13]*p[28] - 4*p[0]*p[9]*p[12]*p[19] + 2*p[0]*p[9]*p[12]*p[28] + 4*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[9]*p[13]*p[25] + 4*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[11]*p[28] + 4*p[0]*p[10]*p[14]*p[16] - 2*p[0]*p[10]*p[14]*p[25] - 2*p[7]*p[10]*p[19] + 2*p[7]*p[10]*p[28] + 2*p[8]*p[9]*p[19] - 2*p[8]*p[9]*p[28] - 2*p[9]*p[9]*p[16] + 2*p[9]*p[9]*p[25] - 2*p[10]*p[10]*p[16] + 2*p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - 2*p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + 2*p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - 2*p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - 2*p[14]*p[14]*p[25];
- coeff[24] = -4*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[11]*p[26] - 4*p[0]*p[7]*p[13]*p[22] + 2*p[0]*p[7]*p[13]*p[31] + 4*p[0]*p[7]*p[14]*p[20] - 2*p[0]*p[7]*p[14]*p[29] - 4*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[12]*p[26] - 4*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[13]*p[29] - 4*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[8]*p[14]*p[31] - 4*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[11]*p[31] - 4*p[0]*p[9]*p[12]*p[20] + 2*p[0]*p[9]*p[12]*p[29] + 4*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[9]*p[13]*p[26] + 4*p[0]*p[10]*p[11]*p[20] - 2*p[0]*p[10]*p[11]*p[29] - 4*p[0]*p[10]*p[12]*p[22] + 2*p[0]*p[10]*p[12]*p[31] + 4*p[0]*p[10]*p[14]*p[17] - 2*p[0]*p[10]*p[14]*p[26] + 2*p[7]*p[9]*p[22] - 2*p[7]*p[9]*p[31] - 2*p[7]*p[10]*p[20] + 2*p[7]*p[10]*p[29] + 2*p[8]*p[9]*p[20] - 2*p[8]*p[9]*p[29] + 2*p[8]*p[10]*p[22] - 2*p[8]*p[10]*p[31] - 2*p[9]*p[9]*p[17] + 2*p[9]*p[9]*p[26] - 2*p[10]*p[10]*p[17] + 2*p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + 2*p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - 2*p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + 2*p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + 2*p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - 2*p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - 2*p[14]*p[14]*p[26];
- coeff[25] = 2*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[8]*p[12]*p[15] - 2*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[10]*p[14]*p[15] + 2*p[9]*p[9]*p[15] - 2*p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - 2*p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + 2*p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + 2*p[14]*p[14]*p[24];
- coeff[26] = 2*p[0]*p[7]*p[11]*p[16] - 2*p[0]*p[7]*p[14]*p[19] + 2*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[9]*p[12]*p[19] - 2*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[14]*p[16] + 2*p[7]*p[10]*p[19] - 2*p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + 2*p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - 2*p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - 2*p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + 2*p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - 2*p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + 2*p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + 2*p[14]*p[14]*p[25];
- coeff[27] = 2*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[13]*p[22] - 2*p[0]*p[7]*p[14]*p[20] + 2*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[12]*p[20] - 2*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[10]*p[11]*p[20] + 2*p[0]*p[10]*p[12]*p[22] - 2*p[0]*p[10]*p[14]*p[17] - 2*p[7]*p[9]*p[22] + 2*p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - 2*p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + 2*p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + 2*p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - 2*p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - 2*p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - 2*p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + 2*p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - 2*p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - 2*p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + 2*p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + 2*p[14]*p[14]*p[26];
- coeff[28] = 0;
- coeff[29] = 2*(p[7]*p[11]*p[15] - p[7]*p[11]*p[24] + p[8]*p[12]*p[15] - p[8]*p[12]*p[24] - p[9]*p[13]*p[15] + p[9]*p[13]*p[24] - p[10]*p[14]*p[15] + p[10]*p[14]*p[24])*p[0];
- coeff[30] = 2*(p[7]*p[11]*p[16] - p[7]*p[11]*p[25] - p[7]*p[14]*p[19] + p[7]*p[14]*p[28] + p[8]*p[12]*p[16] - p[8]*p[12]*p[25] + p[8]*p[13]*p[19] - p[8]*p[13]*p[28] + p[9]*p[12]*p[19] - p[9]*p[12]*p[28] - p[9]*p[13]*p[16] + p[9]*p[13]*p[25] - p[10]*p[11]*p[19] + p[10]*p[11]*p[28] - p[10]*p[14]*p[16] + p[10]*p[14]*p[25])*p[0];
- coeff[31] = 2*(p[7]*p[11]*p[17] - p[7]*p[11]*p[26] + p[7]*p[13]*p[22] - p[7]*p[13]*p[31] - p[7]*p[14]*p[20] + p[7]*p[14]*p[29] + p[8]*p[12]*p[17] - p[8]*p[12]*p[26] + p[8]*p[13]*p[20] - p[8]*p[13]*p[29] + p[8]*p[14]*p[22] - p[8]*p[14]*p[31] + p[9]*p[11]*p[22] - p[9]*p[11]*p[31] + p[9]*p[12]*p[20] - p[9]*p[12]*p[29] - p[9]*p[13]*p[17] + p[9]*p[13]*p[26] - p[10]*p[11]*p[20] + p[10]*p[11]*p[29] + p[10]*p[12]*p[22] - p[10]*p[12]*p[31] - p[10]*p[14]*p[17] + p[10]*p[14]*p[26])*p[0];
- coeff[32] = 2*(-p[7]*p[11]*p[15] + p[7]*p[11]*p[24] - p[8]*p[12]*p[15] + p[8]*p[12]*p[24] + p[9]*p[13]*p[15] - p[9]*p[13]*p[24] + p[10]*p[14]*p[15] - p[10]*p[14]*p[24])*p[0];
- coeff[33] = 2*(-p[7]*p[11]*p[16] + p[7]*p[11]*p[25] + p[7]*p[14]*p[19] - p[7]*p[14]*p[28] - p[8]*p[12]*p[16] + p[8]*p[12]*p[25] - p[8]*p[13]*p[19] + p[8]*p[13]*p[28] - p[9]*p[12]*p[19] + p[9]*p[12]*p[28] + p[9]*p[13]*p[16] - p[9]*p[13]*p[25] + p[10]*p[11]*p[19] - p[10]*p[11]*p[28] + p[10]*p[14]*p[16] - p[10]*p[14]*p[25])*p[0];
- coeff[34] = 2*(-p[7]*p[11]*p[17] + p[7]*p[11]*p[26] - p[7]*p[13]*p[22] + p[7]*p[13]*p[31] + p[7]*p[14]*p[20] - p[7]*p[14]*p[29] - p[8]*p[12]*p[17] + p[8]*p[12]*p[26] - p[8]*p[13]*p[20] + p[8]*p[13]*p[29] - p[8]*p[14]*p[22] + p[8]*p[14]*p[31] - p[9]*p[11]*p[22] + p[9]*p[11]*p[31] - p[9]*p[12]*p[20] + p[9]*p[12]*p[29] + p[9]*p[13]*p[17] - p[9]*p[13]*p[26] + p[10]*p[11]*p[20] - p[10]*p[11]*p[29] - p[10]*p[12]*p[22] + p[10]*p[12]*p[31] + p[10]*p[14]*p[17] - p[10]*p[14]*p[26])*p[0];
- coeff[35] = -2*p[0]*p[7]*p[9]*p[23] + 2*p[0]*p[7]*p[10]*p[21] - 2*p[0]*p[8]*p[9]*p[21] - 2*p[0]*p[8]*p[10]*p[23] + 2*p[0]*p[9]*p[9]*p[18] + 2*p[0]*p[10]*p[10]*p[18] + 2*p[0]*p[11]*p[13]*p[23] - 2*p[0]*p[11]*p[14]*p[21] + 2*p[0]*p[12]*p[13]*p[21] + 2*p[0]*p[12]*p[14]*p[23] - 2*p[0]*p[13]*p[13]*p[18] - 2*p[0]*p[14]*p[14]*p[18] - p[7]*p[11]*p[18] + p[7]*p[11]*p[27] - p[7]*p[13]*p[23] + p[7]*p[13]*p[32] + p[7]*p[14]*p[21] - p[7]*p[14]*p[30] - p[8]*p[12]*p[18] + p[8]*p[12]*p[27] - p[8]*p[13]*p[21] + p[8]*p[13]*p[30] - p[8]*p[14]*p[23] + p[8]*p[14]*p[32] - p[9]*p[11]*p[23] + p[9]*p[11]*p[32] - p[9]*p[12]*p[21] + p[9]*p[12]*p[30] + p[9]*p[13]*p[18] - p[9]*p[13]*p[27] + p[10]*p[11]*p[21] - p[10]*p[11]*p[30] - p[10]*p[12]*p[23] + p[10]*p[12]*p[32] + p[10]*p[14]*p[18] - p[10]*p[14]*p[27];
- coeff[36] = 2*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[10]*p[10]*p[15] - 2*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[14]*p[14]*p[15] - 2*p[7]*p[11]*p[15] + p[7]*p[11]*p[24] - 2*p[8]*p[12]*p[15] + p[8]*p[12]*p[24] + 2*p[9]*p[13]*p[15] - p[9]*p[13]*p[24] + 2*p[10]*p[14]*p[15] - p[10]*p[14]*p[24];
- coeff[37] = 2*p[0]*p[7]*p[10]*p[19] - 2*p[0]*p[8]*p[9]*p[19] + 2*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[16] - 2*p[0]*p[11]*p[14]*p[19] + 2*p[0]*p[12]*p[13]*p[19] - 2*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[16] - 2*p[7]*p[11]*p[16] + p[7]*p[11]*p[25] + 2*p[7]*p[14]*p[19] - p[7]*p[14]*p[28] - 2*p[8]*p[12]*p[16] + p[8]*p[12]*p[25] - 2*p[8]*p[13]*p[19] + p[8]*p[13]*p[28] - 2*p[9]*p[12]*p[19] + p[9]*p[12]*p[28] + 2*p[9]*p[13]*p[16] - p[9]*p[13]*p[25] + 2*p[10]*p[11]*p[19] - p[10]*p[11]*p[28] + 2*p[10]*p[14]*p[16] - p[10]*p[14]*p[25];
- coeff[38] = -2*p[0]*p[7]*p[9]*p[22] + 2*p[0]*p[7]*p[10]*p[20] - 2*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[10]*p[22] + 2*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[11]*p[13]*p[22] - 2*p[0]*p[11]*p[14]*p[20] + 2*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[14]*p[22] - 2*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[14]*p[14]*p[17] - 2*p[7]*p[11]*p[17] + p[7]*p[11]*p[26] - 2*p[7]*p[13]*p[22] + p[7]*p[13]*p[31] + 2*p[7]*p[14]*p[20] - p[7]*p[14]*p[29] - 2*p[8]*p[12]*p[17] + p[8]*p[12]*p[26] - 2*p[8]*p[13]*p[20] + p[8]*p[13]*p[29] - 2*p[8]*p[14]*p[22] + p[8]*p[14]*p[31] - 2*p[9]*p[11]*p[22] + p[9]*p[11]*p[31] - 2*p[9]*p[12]*p[20] + p[9]*p[12]*p[29] + 2*p[9]*p[13]*p[17] - p[9]*p[13]*p[26] + 2*p[10]*p[11]*p[20] - p[10]*p[11]*p[29] - 2*p[10]*p[12]*p[22] + p[10]*p[12]*p[31] + 2*p[10]*p[14]*p[17] - p[10]*p[14]*p[26];
- coeff[39] = (p[7]*p[11] + p[8]*p[12] - p[9]*p[13] - p[10]*p[14])*p[15];
- coeff[40] = p[7]*p[11]*p[16] - p[7]*p[14]*p[19] + p[8]*p[12]*p[16] + p[8]*p[13]*p[19] + p[9]*p[12]*p[19] - p[9]*p[13]*p[16] - p[10]*p[11]*p[19] - p[10]*p[14]*p[16];
- coeff[41] = p[7]*p[11]*p[17] + p[7]*p[13]*p[22] - p[7]*p[14]*p[20] + p[8]*p[12]*p[17] + p[8]*p[13]*p[20] + p[8]*p[14]*p[22] + p[9]*p[11]*p[22] + p[9]*p[12]*p[20] - p[9]*p[13]*p[17] - p[10]*p[11]*p[20] + p[10]*p[12]*p[22] - p[10]*p[14]*p[17];
- coeff[42] = 2*(p[7]*p[9]*p[23] - p[7]*p[9]*p[32] - p[7]*p[10]*p[21] + p[7]*p[10]*p[30] + p[8]*p[9]*p[21] - p[8]*p[9]*p[30] + p[8]*p[10]*p[23] - p[8]*p[10]*p[32] - p[9]*p[9]*p[18] + p[9]*p[9]*p[27] - p[10]*p[10]*p[18] + p[10]*p[10]*p[27] - p[11]*p[13]*p[23] + p[11]*p[13]*p[32] + p[11]*p[14]*p[21] - p[11]*p[14]*p[30] - p[12]*p[13]*p[21] + p[12]*p[13]*p[30] - p[12]*p[14]*p[23] + p[12]*p[14]*p[32] + p[13]*p[13]*p[18] - p[13]*p[13]*p[27] + p[14]*p[14]*p[18] - p[14]*p[14]*p[27])*p[0];
- coeff[43] = -4*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[9]*p[9]*p[24] - 4*p[0]*p[10]*p[10]*p[15] + 2*p[0]*p[10]*p[10]*p[24] + 4*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[13]*p[13]*p[24] + 4*p[0]*p[14]*p[14]*p[15] - 2*p[0]*p[14]*p[14]*p[24] + 2*p[7]*p[11]*p[15] - 2*p[7]*p[11]*p[24] + 2*p[8]*p[12]*p[15] - 2*p[8]*p[12]*p[24] - 2*p[9]*p[13]*p[15] + 2*p[9]*p[13]*p[24] - 2*p[10]*p[14]*p[15] + 2*p[10]*p[14]*p[24];
- coeff[44] = -4*p[0]*p[7]*p[10]*p[19] + 2*p[0]*p[7]*p[10]*p[28] + 4*p[0]*p[8]*p[9]*p[19] - 2*p[0]*p[8]*p[9]*p[28] - 4*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[9]*p[9]*p[25] - 4*p[0]*p[10]*p[10]*p[16] + 2*p[0]*p[10]*p[10]*p[25] + 4*p[0]*p[11]*p[14]*p[19] - 2*p[0]*p[11]*p[14]*p[28] - 4*p[0]*p[12]*p[13]*p[19] + 2*p[0]*p[12]*p[13]*p[28] + 4*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[13]*p[13]*p[25] + 4*p[0]*p[14]*p[14]*p[16] - 2*p[0]*p[14]*p[14]*p[25] + 2*p[7]*p[11]*p[16] - 2*p[7]*p[11]*p[25] - 2*p[7]*p[14]*p[19] + 2*p[7]*p[14]*p[28] + 2*p[8]*p[12]*p[16] - 2*p[8]*p[12]*p[25] + 2*p[8]*p[13]*p[19] - 2*p[8]*p[13]*p[28] + 2*p[9]*p[12]*p[19] - 2*p[9]*p[12]*p[28] - 2*p[9]*p[13]*p[16] + 2*p[9]*p[13]*p[25] - 2*p[10]*p[11]*p[19] + 2*p[10]*p[11]*p[28] - 2*p[10]*p[14]*p[16] + 2*p[10]*p[14]*p[25];
- coeff[45] = 4*p[0]*p[7]*p[9]*p[22] - 2*p[0]*p[7]*p[9]*p[31] - 4*p[0]*p[7]*p[10]*p[20] + 2*p[0]*p[7]*p[10]*p[29] + 4*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[9]*p[29] + 4*p[0]*p[8]*p[10]*p[22] - 2*p[0]*p[8]*p[10]*p[31] - 4*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[9]*p[9]*p[26] - 4*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[10]*p[10]*p[26] - 4*p[0]*p[11]*p[13]*p[22] + 2*p[0]*p[11]*p[13]*p[31] + 4*p[0]*p[11]*p[14]*p[20] - 2*p[0]*p[11]*p[14]*p[29] - 4*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[13]*p[29] - 4*p[0]*p[12]*p[14]*p[22] + 2*p[0]*p[12]*p[14]*p[31] + 4*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[13]*p[13]*p[26] + 4*p[0]*p[14]*p[14]*p[17] - 2*p[0]*p[14]*p[14]*p[26] + 2*p[7]*p[11]*p[17] - 2*p[7]*p[11]*p[26] + 2*p[7]*p[13]*p[22] - 2*p[7]*p[13]*p[31] - 2*p[7]*p[14]*p[20] + 2*p[7]*p[14]*p[29] + 2*p[8]*p[12]*p[17] - 2*p[8]*p[12]*p[26] + 2*p[8]*p[13]*p[20] - 2*p[8]*p[13]*p[29] + 2*p[8]*p[14]*p[22] - 2*p[8]*p[14]*p[31] + 2*p[9]*p[11]*p[22] - 2*p[9]*p[11]*p[31] + 2*p[9]*p[12]*p[20] - 2*p[9]*p[12]*p[29] - 2*p[9]*p[13]*p[17] + 2*p[9]*p[13]*p[26] - 2*p[10]*p[11]*p[20] + 2*p[10]*p[11]*p[29] + 2*p[10]*p[12]*p[22] - 2*p[10]*p[12]*p[31] - 2*p[10]*p[14]*p[17] + 2*p[10]*p[14]*p[26];
- coeff[46] = 2*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[10]*p[10]*p[15] - 2*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[14]*p[14]*p[15] - 2*p[7]*p[11]*p[15] + 2*p[7]*p[11]*p[24] - 2*p[8]*p[12]*p[15] + 2*p[8]*p[12]*p[24] + 2*p[9]*p[13]*p[15] - 2*p[9]*p[13]*p[24] + 2*p[10]*p[14]*p[15] - 2*p[10]*p[14]*p[24];
- coeff[47] = 2*p[0]*p[7]*p[10]*p[19] - 2*p[0]*p[8]*p[9]*p[19] + 2*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[16] - 2*p[0]*p[11]*p[14]*p[19] + 2*p[0]*p[12]*p[13]*p[19] - 2*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[16] - 2*p[7]*p[11]*p[16] + 2*p[7]*p[11]*p[25] + 2*p[7]*p[14]*p[19] - 2*p[7]*p[14]*p[28] - 2*p[8]*p[12]*p[16] + 2*p[8]*p[12]*p[25] - 2*p[8]*p[13]*p[19] + 2*p[8]*p[13]*p[28] - 2*p[9]*p[12]*p[19] + 2*p[9]*p[12]*p[28] + 2*p[9]*p[13]*p[16] - 2*p[9]*p[13]*p[25] + 2*p[10]*p[11]*p[19] - 2*p[10]*p[11]*p[28] + 2*p[10]*p[14]*p[16] - 2*p[10]*p[14]*p[25];
- coeff[48] = -2*p[0]*p[7]*p[9]*p[22] + 2*p[0]*p[7]*p[10]*p[20] - 2*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[10]*p[22] + 2*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[11]*p[13]*p[22] - 2*p[0]*p[11]*p[14]*p[20] + 2*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[14]*p[22] - 2*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[14]*p[14]*p[17] - 2*p[7]*p[11]*p[17] + 2*p[7]*p[11]*p[26] - 2*p[7]*p[13]*p[22] + 2*p[7]*p[13]*p[31] + 2*p[7]*p[14]*p[20] - 2*p[7]*p[14]*p[29] - 2*p[8]*p[12]*p[17] + 2*p[8]*p[12]*p[26] - 2*p[8]*p[13]*p[20] + 2*p[8]*p[13]*p[29] - 2*p[8]*p[14]*p[22] + 2*p[8]*p[14]*p[31] - 2*p[9]*p[11]*p[22] + 2*p[9]*p[11]*p[31] - 2*p[9]*p[12]*p[20] + 2*p[9]*p[12]*p[29] + 2*p[9]*p[13]*p[17] - 2*p[9]*p[13]*p[26] + 2*p[10]*p[11]*p[20] - 2*p[10]*p[11]*p[29] - 2*p[10]*p[12]*p[22] + 2*p[10]*p[12]*p[31] + 2*p[10]*p[14]*p[17] - 2*p[10]*p[14]*p[26];
- coeff[49] = 0;
- coeff[50] = 2*(p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + p[10]*p[10]*p[15] - p[10]*p[10]*p[24] - p[13]*p[13]*p[15] + p[13]*p[13]*p[24] - p[14]*p[14]*p[15] + p[14]*p[14]*p[24])*p[0];
- coeff[51] = 2*(p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + p[10]*p[10]*p[16] - p[10]*p[10]*p[25] - p[11]*p[14]*p[19] + p[11]*p[14]*p[28] + p[12]*p[13]*p[19] - p[12]*p[13]*p[28] - p[13]*p[13]*p[16] + p[13]*p[13]*p[25] - p[14]*p[14]*p[16] + p[14]*p[14]*p[25])*p[0];
- coeff[52] = 2*(-p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + p[10]*p[10]*p[17] - p[10]*p[10]*p[26] + p[11]*p[13]*p[22] - p[11]*p[13]*p[31] - p[11]*p[14]*p[20] + p[11]*p[14]*p[29] + p[12]*p[13]*p[20] - p[12]*p[13]*p[29] + p[12]*p[14]*p[22] - p[12]*p[14]*p[31] - p[13]*p[13]*p[17] + p[13]*p[13]*p[26] - p[14]*p[14]*p[17] + p[14]*p[14]*p[26])*p[0];
- coeff[53] = 2*(-p[9]*p[9]*p[15] + p[9]*p[9]*p[24] - p[10]*p[10]*p[15] + p[10]*p[10]*p[24] + p[13]*p[13]*p[15] - p[13]*p[13]*p[24] + p[14]*p[14]*p[15] - p[14]*p[14]*p[24])*p[0];
- coeff[54] = 2*(-p[7]*p[10]*p[19] + p[7]*p[10]*p[28] + p[8]*p[9]*p[19] - p[8]*p[9]*p[28] - p[9]*p[9]*p[16] + p[9]*p[9]*p[25] - p[10]*p[10]*p[16] + p[10]*p[10]*p[25] + p[11]*p[14]*p[19] - p[11]*p[14]*p[28] - p[12]*p[13]*p[19] + p[12]*p[13]*p[28] + p[13]*p[13]*p[16] - p[13]*p[13]*p[25] + p[14]*p[14]*p[16] - p[14]*p[14]*p[25])*p[0];
- coeff[55] = 2*(p[7]*p[9]*p[22] - p[7]*p[9]*p[31] - p[7]*p[10]*p[20] + p[7]*p[10]*p[29] + p[8]*p[9]*p[20] - p[8]*p[9]*p[29] + p[8]*p[10]*p[22] - p[8]*p[10]*p[31] - p[9]*p[9]*p[17] + p[9]*p[9]*p[26] - p[10]*p[10]*p[17] + p[10]*p[10]*p[26] - p[11]*p[13]*p[22] + p[11]*p[13]*p[31] + p[11]*p[14]*p[20] - p[11]*p[14]*p[29] - p[12]*p[13]*p[20] + p[12]*p[13]*p[29] - p[12]*p[14]*p[22] + p[12]*p[14]*p[31] + p[13]*p[13]*p[17] - p[13]*p[13]*p[26] + p[14]*p[14]*p[17] - p[14]*p[14]*p[26])*p[0];
- coeff[56] = -p[2] + p[5] + p[7]*p[8]*p[23] - p[7]*p[8]*p[32] - p[7]*p[10]*p[18] + p[7]*p[10]*p[27] + p[8]*p[8]*p[21] - p[8]*p[8]*p[30] - p[8]*p[9]*p[18] + p[8]*p[9]*p[27] - p[9]*p[10]*p[23] + p[9]*p[10]*p[32] + p[10]*p[10]*p[21] - p[10]*p[10]*p[30] + p[11]*p[12]*p[23] - p[11]*p[12]*p[32] - p[11]*p[14]*p[18] + p[11]*p[14]*p[27] + p[12]*p[12]*p[21] - p[12]*p[12]*p[30] - p[12]*p[13]*p[18] + p[12]*p[13]*p[27] - p[13]*p[14]*p[23] + p[13]*p[14]*p[32] + p[14]*p[14]*p[21] - p[14]*p[14]*p[30] - p[21] + p[30];
- coeff[57] = -2*p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + p[12]*p[13]*p[24];
- coeff[58] = -2*p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - p[14]*p[14]*p[28] - 2*p[19] + p[28];
- coeff[59] = 2*p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - p[14]*p[14]*p[29] - 2*p[20] + p[29];
- coeff[60] = (p[7]*p[10] + p[8]*p[9] + p[11]*p[14] + p[12]*p[13])*p[15];
- coeff[61] = p[7]*p[10]*p[16] - p[8]*p[8]*p[19] + p[8]*p[9]*p[16] - p[10]*p[10]*p[19] + p[11]*p[14]*p[16] - p[12]*p[12]*p[19] + p[12]*p[13]*p[16] - p[14]*p[14]*p[19] + p[19];
- coeff[62] = -p[7]*p[8]*p[22] + p[7]*p[10]*p[17] - p[8]*p[8]*p[20] + p[8]*p[9]*p[17] + p[9]*p[10]*p[22] - p[10]*p[10]*p[20] - p[11]*p[12]*p[22] + p[11]*p[14]*p[17] - p[12]*p[12]*p[20] + p[12]*p[13]*p[17] + p[13]*p[14]*p[22] - p[14]*p[14]*p[20] + p[20];
- coeff[63] = 0;
- coeff[64] = 2*p[7]*p[10]*p[15] - 2*p[7]*p[10]*p[24] + 2*p[8]*p[9]*p[15] - 2*p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - 2*p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - 2*p[12]*p[13]*p[24];
- coeff[65] = 2*p[7]*p[10]*p[16] - 2*p[7]*p[10]*p[25] - 2*p[8]*p[8]*p[19] + 2*p[8]*p[8]*p[28] + 2*p[8]*p[9]*p[16] - 2*p[8]*p[9]*p[25] - 2*p[10]*p[10]*p[19] + 2*p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - 2*p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + 2*p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - 2*p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + 2*p[14]*p[14]*p[28] + 2*p[19] - 2*p[28];
- coeff[66] = -2*p[7]*p[8]*p[22] + 2*p[7]*p[8]*p[31] + 2*p[7]*p[10]*p[17] - 2*p[7]*p[10]*p[26] - 2*p[8]*p[8]*p[20] + 2*p[8]*p[8]*p[29] + 2*p[8]*p[9]*p[17] - 2*p[8]*p[9]*p[26] + 2*p[9]*p[10]*p[22] - 2*p[9]*p[10]*p[31] - 2*p[10]*p[10]*p[20] + 2*p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + 2*p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - 2*p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + 2*p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - 2*p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - 2*p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + 2*p[14]*p[14]*p[29] + 2*p[20] - 2*p[29];
- coeff[67] = -2*p[7]*p[10]*p[15] + 2*p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + 2*p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + 2*p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + 2*p[12]*p[13]*p[24];
- coeff[68] = -2*p[7]*p[10]*p[16] + 2*p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - 2*p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + 2*p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - 2*p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + 2*p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - 2*p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + 2*p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - 2*p[14]*p[14]*p[28] - 2*p[19] + 2*p[28];
- coeff[69] = 2*p[7]*p[8]*p[22] - 2*p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + 2*p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - 2*p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + 2*p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + 2*p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - 2*p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - 2*p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + 2*p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - 2*p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + 2*p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + 2*p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - 2*p[14]*p[14]*p[29] - 2*p[20] + 2*p[29];
- coeff[70] = 2*p[0]*p[7]*p[11]*p[21] - 2*p[0]*p[7]*p[12]*p[23] + 2*p[0]*p[7]*p[14]*p[18] - 2*p[0]*p[8]*p[11]*p[23] - 2*p[0]*p[8]*p[12]*p[21] + 2*p[0]*p[8]*p[13]*p[18] + 2*p[0]*p[9]*p[12]*p[18] + 2*p[0]*p[9]*p[13]*p[21] + 2*p[0]*p[9]*p[14]*p[23] + 2*p[0]*p[10]*p[11]*p[18] + 2*p[0]*p[10]*p[13]*p[23] - 2*p[0]*p[10]*p[14]*p[21] + p[7]*p[8]*p[23] - p[7]*p[8]*p[32] - p[7]*p[10]*p[18] + p[7]*p[10]*p[27] + p[8]*p[8]*p[21] - p[8]*p[8]*p[30] - p[8]*p[9]*p[18] + p[8]*p[9]*p[27] - p[9]*p[10]*p[23] + p[9]*p[10]*p[32] + p[10]*p[10]*p[21] - p[10]*p[10]*p[30] - p[11]*p[12]*p[23] + p[11]*p[12]*p[32] + p[11]*p[14]*p[18] - p[11]*p[14]*p[27] - p[12]*p[12]*p[21] + p[12]*p[12]*p[30] + p[12]*p[13]*p[18] - p[12]*p[13]*p[27] + p[13]*p[14]*p[23] - p[13]*p[14]*p[32] - p[14]*p[14]*p[21] + p[14]*p[14]*p[30];
- coeff[71] = 2*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[10]*p[11]*p[15] - 2*p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - p[12]*p[13]*p[24];
- coeff[72] = 2*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[14]*p[16] - 2*p[0]*p[8]*p[12]*p[19] + 2*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[10]*p[11]*p[16] - 2*p[0]*p[10]*p[14]*p[19] - 2*p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + p[14]*p[14]*p[28];
- coeff[73] = 2*p[0]*p[7]*p[11]*p[20] - 2*p[0]*p[7]*p[12]*p[22] + 2*p[0]*p[7]*p[14]*p[17] - 2*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[12]*p[20] + 2*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[13]*p[22] - 2*p[0]*p[10]*p[14]*p[20] + 2*p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + p[14]*p[14]*p[29];
- coeff[74] = (p[7]*p[10] + p[8]*p[9] - p[11]*p[14] - p[12]*p[13])*p[15];
- coeff[75] = p[7]*p[10]*p[16] - p[8]*p[8]*p[19] + p[8]*p[9]*p[16] - p[10]*p[10]*p[19] - p[11]*p[14]*p[16] + p[12]*p[12]*p[19] - p[12]*p[13]*p[16] + p[14]*p[14]*p[19];
- coeff[76] = -p[7]*p[8]*p[22] + p[7]*p[10]*p[17] - p[8]*p[8]*p[20] + p[8]*p[9]*p[17] + p[9]*p[10]*p[22] - p[10]*p[10]*p[20] + p[11]*p[12]*p[22] - p[11]*p[14]*p[17] + p[12]*p[12]*p[20] - p[12]*p[13]*p[17] - p[13]*p[14]*p[22] + p[14]*p[14]*p[20];
- coeff[77] = 2*(-p[7]*p[11]*p[21] + p[7]*p[11]*p[30] + p[7]*p[12]*p[23] - p[7]*p[12]*p[32] - p[7]*p[14]*p[18] + p[7]*p[14]*p[27] + p[8]*p[11]*p[23] - p[8]*p[11]*p[32] + p[8]*p[12]*p[21] - p[8]*p[12]*p[30] - p[8]*p[13]*p[18] + p[8]*p[13]*p[27] - p[9]*p[12]*p[18] + p[9]*p[12]*p[27] - p[9]*p[13]*p[21] + p[9]*p[13]*p[30] - p[9]*p[14]*p[23] + p[9]*p[14]*p[32] - p[10]*p[11]*p[18] + p[10]*p[11]*p[27] - p[10]*p[13]*p[23] + p[10]*p[13]*p[32] + p[10]*p[14]*p[21] - p[10]*p[14]*p[30])*p[0];
- coeff[78] = -4*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[7]*p[14]*p[24] - 4*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[8]*p[13]*p[24] - 4*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[9]*p[12]*p[24] - 4*p[0]*p[10]*p[11]*p[15] + 2*p[0]*p[10]*p[11]*p[24] + 2*p[7]*p[10]*p[15] - 2*p[7]*p[10]*p[24] + 2*p[8]*p[9]*p[15] - 2*p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + 2*p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + 2*p[12]*p[13]*p[24];
- coeff[79] = -4*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[11]*p[28] - 4*p[0]*p[7]*p[14]*p[16] + 2*p[0]*p[7]*p[14]*p[25] + 4*p[0]*p[8]*p[12]*p[19] - 2*p[0]*p[8]*p[12]*p[28] - 4*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[8]*p[13]*p[25] - 4*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[12]*p[25] - 4*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[9]*p[13]*p[28] - 4*p[0]*p[10]*p[11]*p[16] + 2*p[0]*p[10]*p[11]*p[25] + 4*p[0]*p[10]*p[14]*p[19] - 2*p[0]*p[10]*p[14]*p[28] + 2*p[7]*p[10]*p[16] - 2*p[7]*p[10]*p[25] - 2*p[8]*p[8]*p[19] + 2*p[8]*p[8]*p[28] + 2*p[8]*p[9]*p[16] - 2*p[8]*p[9]*p[25] - 2*p[10]*p[10]*p[19] + 2*p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + 2*p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - 2*p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + 2*p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - 2*p[14]*p[14]*p[28];
- coeff[80] = -4*p[0]*p[7]*p[11]*p[20] + 2*p[0]*p[7]*p[11]*p[29] + 4*p[0]*p[7]*p[12]*p[22] - 2*p[0]*p[7]*p[12]*p[31] - 4*p[0]*p[7]*p[14]*p[17] + 2*p[0]*p[7]*p[14]*p[26] + 4*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[11]*p[31] + 4*p[0]*p[8]*p[12]*p[20] - 2*p[0]*p[8]*p[12]*p[29] - 4*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[8]*p[13]*p[26] - 4*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[12]*p[26] - 4*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[13]*p[29] - 4*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[9]*p[14]*p[31] - 4*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[11]*p[26] - 4*p[0]*p[10]*p[13]*p[22] + 2*p[0]*p[10]*p[13]*p[31] + 4*p[0]*p[10]*p[14]*p[20] - 2*p[0]*p[10]*p[14]*p[29] - 2*p[7]*p[8]*p[22] + 2*p[7]*p[8]*p[31] + 2*p[7]*p[10]*p[17] - 2*p[7]*p[10]*p[26] - 2*p[8]*p[8]*p[20] + 2*p[8]*p[8]*p[29] + 2*p[8]*p[9]*p[17] - 2*p[8]*p[9]*p[26] + 2*p[9]*p[10]*p[22] - 2*p[9]*p[10]*p[31] - 2*p[10]*p[10]*p[20] + 2*p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - 2*p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + 2*p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - 2*p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + 2*p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + 2*p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - 2*p[14]*p[14]*p[29];
- coeff[81] = 2*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[10]*p[11]*p[15] - 2*p[7]*p[10]*p[15] + 2*p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + 2*p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - 2*p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - 2*p[12]*p[13]*p[24];
- coeff[82] = 2*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[14]*p[16] - 2*p[0]*p[8]*p[12]*p[19] + 2*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[10]*p[11]*p[16] - 2*p[0]*p[10]*p[14]*p[19] - 2*p[7]*p[10]*p[16] + 2*p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - 2*p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + 2*p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - 2*p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - 2*p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + 2*p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - 2*p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + 2*p[14]*p[14]*p[28];
- coeff[83] = 2*p[0]*p[7]*p[11]*p[20] - 2*p[0]*p[7]*p[12]*p[22] + 2*p[0]*p[7]*p[14]*p[17] - 2*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[12]*p[20] + 2*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[13]*p[22] - 2*p[0]*p[10]*p[14]*p[20] + 2*p[7]*p[8]*p[22] - 2*p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + 2*p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - 2*p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + 2*p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + 2*p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - 2*p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + 2*p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - 2*p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + 2*p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - 2*p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - 2*p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + 2*p[14]*p[14]*p[29];
- coeff[84] = 0;
- coeff[85] = 2*(p[7]*p[14]*p[15] - p[7]*p[14]*p[24] + p[8]*p[13]*p[15] - p[8]*p[13]*p[24] + p[9]*p[12]*p[15] - p[9]*p[12]*p[24] + p[10]*p[11]*p[15] - p[10]*p[11]*p[24])*p[0];
- coeff[86] = 2*(p[7]*p[11]*p[19] - p[7]*p[11]*p[28] + p[7]*p[14]*p[16] - p[7]*p[14]*p[25] - p[8]*p[12]*p[19] + p[8]*p[12]*p[28] + p[8]*p[13]*p[16] - p[8]*p[13]*p[25] + p[9]*p[12]*p[16] - p[9]*p[12]*p[25] + p[9]*p[13]*p[19] - p[9]*p[13]*p[28] + p[10]*p[11]*p[16] - p[10]*p[11]*p[25] - p[10]*p[14]*p[19] + p[10]*p[14]*p[28])*p[0];
- coeff[87] = 2*(p[7]*p[11]*p[20] - p[7]*p[11]*p[29] - p[7]*p[12]*p[22] + p[7]*p[12]*p[31] + p[7]*p[14]*p[17] - p[7]*p[14]*p[26] - p[8]*p[11]*p[22] + p[8]*p[11]*p[31] - p[8]*p[12]*p[20] + p[8]*p[12]*p[29] + p[8]*p[13]*p[17] - p[8]*p[13]*p[26] + p[9]*p[12]*p[17] - p[9]*p[12]*p[26] + p[9]*p[13]*p[20] - p[9]*p[13]*p[29] + p[9]*p[14]*p[22] - p[9]*p[14]*p[31] + p[10]*p[11]*p[17] - p[10]*p[11]*p[26] + p[10]*p[13]*p[22] - p[10]*p[13]*p[31] - p[10]*p[14]*p[20] + p[10]*p[14]*p[29])*p[0];
- coeff[88] = 2*(-p[7]*p[14]*p[15] + p[7]*p[14]*p[24] - p[8]*p[13]*p[15] + p[8]*p[13]*p[24] - p[9]*p[12]*p[15] + p[9]*p[12]*p[24] - p[10]*p[11]*p[15] + p[10]*p[11]*p[24])*p[0];
- coeff[89] = 2*(-p[7]*p[11]*p[19] + p[7]*p[11]*p[28] - p[7]*p[14]*p[16] + p[7]*p[14]*p[25] + p[8]*p[12]*p[19] - p[8]*p[12]*p[28] - p[8]*p[13]*p[16] + p[8]*p[13]*p[25] - p[9]*p[12]*p[16] + p[9]*p[12]*p[25] - p[9]*p[13]*p[19] + p[9]*p[13]*p[28] - p[10]*p[11]*p[16] + p[10]*p[11]*p[25] + p[10]*p[14]*p[19] - p[10]*p[14]*p[28])*p[0];
- coeff[90] = 2*(-p[7]*p[11]*p[20] + p[7]*p[11]*p[29] + p[7]*p[12]*p[22] - p[7]*p[12]*p[31] - p[7]*p[14]*p[17] + p[7]*p[14]*p[26] + p[8]*p[11]*p[22] - p[8]*p[11]*p[31] + p[8]*p[12]*p[20] - p[8]*p[12]*p[29] - p[8]*p[13]*p[17] + p[8]*p[13]*p[26] - p[9]*p[12]*p[17] + p[9]*p[12]*p[26] - p[9]*p[13]*p[20] + p[9]*p[13]*p[29] - p[9]*p[14]*p[22] + p[9]*p[14]*p[31] - p[10]*p[11]*p[17] + p[10]*p[11]*p[26] - p[10]*p[13]*p[22] + p[10]*p[13]*p[31] + p[10]*p[14]*p[20] - p[10]*p[14]*p[29])*p[0];
- coeff[91] = 2*p[0]*p[7]*p[8]*p[23] - 2*p[0]*p[7]*p[10]*p[18] + 2*p[0]*p[8]*p[8]*p[21] - 2*p[0]*p[8]*p[9]*p[18] - 2*p[0]*p[9]*p[10]*p[23] + 2*p[0]*p[10]*p[10]*p[21] - 2*p[0]*p[11]*p[12]*p[23] + 2*p[0]*p[11]*p[14]*p[18] - 2*p[0]*p[12]*p[12]*p[21] + 2*p[0]*p[12]*p[13]*p[18] + 2*p[0]*p[13]*p[14]*p[23] - 2*p[0]*p[14]*p[14]*p[21] - p[7]*p[11]*p[21] + p[7]*p[11]*p[30] + p[7]*p[12]*p[23] - p[7]*p[12]*p[32] - p[7]*p[14]*p[18] + p[7]*p[14]*p[27] + p[8]*p[11]*p[23] - p[8]*p[11]*p[32] + p[8]*p[12]*p[21] - p[8]*p[12]*p[30] - p[8]*p[13]*p[18] + p[8]*p[13]*p[27] - p[9]*p[12]*p[18] + p[9]*p[12]*p[27] - p[9]*p[13]*p[21] + p[9]*p[13]*p[30] - p[9]*p[14]*p[23] + p[9]*p[14]*p[32] - p[10]*p[11]*p[18] + p[10]*p[11]*p[27] - p[10]*p[13]*p[23] + p[10]*p[13]*p[32] + p[10]*p[14]*p[21] - p[10]*p[14]*p[30];
- coeff[92] = -2*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[8]*p[9]*p[15] + 2*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[12]*p[13]*p[15] - 2*p[7]*p[14]*p[15] + p[7]*p[14]*p[24] - 2*p[8]*p[13]*p[15] + p[8]*p[13]*p[24] - 2*p[9]*p[12]*p[15] + p[9]*p[12]*p[24] - 2*p[10]*p[11]*p[15] + p[10]*p[11]*p[24];
- coeff[93] = -2*p[0]*p[7]*p[10]*p[16] + 2*p[0]*p[8]*p[8]*p[19] - 2*p[0]*p[8]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[11]*p[14]*p[16] - 2*p[0]*p[12]*p[12]*p[19] + 2*p[0]*p[12]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[19] - 2*p[7]*p[11]*p[19] + p[7]*p[11]*p[28] - 2*p[7]*p[14]*p[16] + p[7]*p[14]*p[25] + 2*p[8]*p[12]*p[19] - p[8]*p[12]*p[28] - 2*p[8]*p[13]*p[16] + p[8]*p[13]*p[25] - 2*p[9]*p[12]*p[16] + p[9]*p[12]*p[25] - 2*p[9]*p[13]*p[19] + p[9]*p[13]*p[28] - 2*p[10]*p[11]*p[16] + p[10]*p[11]*p[25] + 2*p[10]*p[14]*p[19] - p[10]*p[14]*p[28];
- coeff[94] = 2*p[0]*p[7]*p[8]*p[22] - 2*p[0]*p[7]*p[10]*p[17] + 2*p[0]*p[8]*p[8]*p[20] - 2*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[9]*p[10]*p[22] + 2*p[0]*p[10]*p[10]*p[20] - 2*p[0]*p[11]*p[12]*p[22] + 2*p[0]*p[11]*p[14]*p[17] - 2*p[0]*p[12]*p[12]*p[20] + 2*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[13]*p[14]*p[22] - 2*p[0]*p[14]*p[14]*p[20] - 2*p[7]*p[11]*p[20] + p[7]*p[11]*p[29] + 2*p[7]*p[12]*p[22] - p[7]*p[12]*p[31] - 2*p[7]*p[14]*p[17] + p[7]*p[14]*p[26] + 2*p[8]*p[11]*p[22] - p[8]*p[11]*p[31] + 2*p[8]*p[12]*p[20] - p[8]*p[12]*p[29] - 2*p[8]*p[13]*p[17] + p[8]*p[13]*p[26] - 2*p[9]*p[12]*p[17] + p[9]*p[12]*p[26] - 2*p[9]*p[13]*p[20] + p[9]*p[13]*p[29] - 2*p[9]*p[14]*p[22] + p[9]*p[14]*p[31] - 2*p[10]*p[11]*p[17] + p[10]*p[11]*p[26] - 2*p[10]*p[13]*p[22] + p[10]*p[13]*p[31] + 2*p[10]*p[14]*p[20] - p[10]*p[14]*p[29];
- coeff[95] = (p[7]*p[14] + p[8]*p[13] + p[9]*p[12] + p[10]*p[11])*p[15];
- coeff[96] = p[7]*p[11]*p[19] + p[7]*p[14]*p[16] - p[8]*p[12]*p[19] + p[8]*p[13]*p[16] + p[9]*p[12]*p[16] + p[9]*p[13]*p[19] + p[10]*p[11]*p[16] - p[10]*p[14]*p[19];
- coeff[97] = p[7]*p[11]*p[20] - p[7]*p[12]*p[22] + p[7]*p[14]*p[17] - p[8]*p[11]*p[22] - p[8]*p[12]*p[20] + p[8]*p[13]*p[17] + p[9]*p[12]*p[17] + p[9]*p[13]*p[20] + p[9]*p[14]*p[22] + p[10]*p[11]*p[17] + p[10]*p[13]*p[22] - p[10]*p[14]*p[20];
- coeff[98] = 2*(-p[7]*p[8]*p[23] + p[7]*p[8]*p[32] + p[7]*p[10]*p[18] - p[7]*p[10]*p[27] - p[8]*p[8]*p[21] + p[8]*p[8]*p[30] + p[8]*p[9]*p[18] - p[8]*p[9]*p[27] + p[9]*p[10]*p[23] - p[9]*p[10]*p[32] - p[10]*p[10]*p[21] + p[10]*p[10]*p[30] + p[11]*p[12]*p[23] - p[11]*p[12]*p[32] - p[11]*p[14]*p[18] + p[11]*p[14]*p[27] + p[12]*p[12]*p[21] - p[12]*p[12]*p[30] - p[12]*p[13]*p[18] + p[12]*p[13]*p[27] - p[13]*p[14]*p[23] + p[13]*p[14]*p[32] + p[14]*p[14]*p[21] - p[14]*p[14]*p[30])*p[0];
- coeff[99] = 4*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[7]*p[10]*p[24] + 4*p[0]*p[8]*p[9]*p[15] - 2*p[0]*p[8]*p[9]*p[24] - 4*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[11]*p[14]*p[24] - 4*p[0]*p[12]*p[13]*p[15] + 2*p[0]*p[12]*p[13]*p[24] + 2*p[7]*p[14]*p[15] - 2*p[7]*p[14]*p[24] + 2*p[8]*p[13]*p[15] - 2*p[8]*p[13]*p[24] + 2*p[9]*p[12]*p[15] - 2*p[9]*p[12]*p[24] + 2*p[10]*p[11]*p[15] - 2*p[10]*p[11]*p[24];
- coeff[100] = 4*p[0]*p[7]*p[10]*p[16] - 2*p[0]*p[7]*p[10]*p[25] - 4*p[0]*p[8]*p[8]*p[19] + 2*p[0]*p[8]*p[8]*p[28] + 4*p[0]*p[8]*p[9]*p[16] - 2*p[0]*p[8]*p[9]*p[25] - 4*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[10]*p[10]*p[28] - 4*p[0]*p[11]*p[14]*p[16] + 2*p[0]*p[11]*p[14]*p[25] + 4*p[0]*p[12]*p[12]*p[19] - 2*p[0]*p[12]*p[12]*p[28] - 4*p[0]*p[12]*p[13]*p[16] + 2*p[0]*p[12]*p[13]*p[25] + 4*p[0]*p[14]*p[14]*p[19] - 2*p[0]*p[14]*p[14]*p[28] + 2*p[7]*p[11]*p[19] - 2*p[7]*p[11]*p[28] + 2*p[7]*p[14]*p[16] - 2*p[7]*p[14]*p[25] - 2*p[8]*p[12]*p[19] + 2*p[8]*p[12]*p[28] + 2*p[8]*p[13]*p[16] - 2*p[8]*p[13]*p[25] + 2*p[9]*p[12]*p[16] - 2*p[9]*p[12]*p[25] + 2*p[9]*p[13]*p[19] - 2*p[9]*p[13]*p[28] + 2*p[10]*p[11]*p[16] - 2*p[10]*p[11]*p[25] - 2*p[10]*p[14]*p[19] + 2*p[10]*p[14]*p[28];
- coeff[101] = -4*p[0]*p[7]*p[8]*p[22] + 2*p[0]*p[7]*p[8]*p[31] + 4*p[0]*p[7]*p[10]*p[17] - 2*p[0]*p[7]*p[10]*p[26] - 4*p[0]*p[8]*p[8]*p[20] + 2*p[0]*p[8]*p[8]*p[29] + 4*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[8]*p[9]*p[26] + 4*p[0]*p[9]*p[10]*p[22] - 2*p[0]*p[9]*p[10]*p[31] - 4*p[0]*p[10]*p[10]*p[20] + 2*p[0]*p[10]*p[10]*p[29] + 4*p[0]*p[11]*p[12]*p[22] - 2*p[0]*p[11]*p[12]*p[31] - 4*p[0]*p[11]*p[14]*p[17] + 2*p[0]*p[11]*p[14]*p[26] + 4*p[0]*p[12]*p[12]*p[20] - 2*p[0]*p[12]*p[12]*p[29] - 4*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[12]*p[13]*p[26] - 4*p[0]*p[13]*p[14]*p[22] + 2*p[0]*p[13]*p[14]*p[31] + 4*p[0]*p[14]*p[14]*p[20] - 2*p[0]*p[14]*p[14]*p[29] + 2*p[7]*p[11]*p[20] - 2*p[7]*p[11]*p[29] - 2*p[7]*p[12]*p[22] + 2*p[7]*p[12]*p[31] + 2*p[7]*p[14]*p[17] - 2*p[7]*p[14]*p[26] - 2*p[8]*p[11]*p[22] + 2*p[8]*p[11]*p[31] - 2*p[8]*p[12]*p[20] + 2*p[8]*p[12]*p[29] + 2*p[8]*p[13]*p[17] - 2*p[8]*p[13]*p[26] + 2*p[9]*p[12]*p[17] - 2*p[9]*p[12]*p[26] + 2*p[9]*p[13]*p[20] - 2*p[9]*p[13]*p[29] + 2*p[9]*p[14]*p[22] - 2*p[9]*p[14]*p[31] + 2*p[10]*p[11]*p[17] - 2*p[10]*p[11]*p[26] + 2*p[10]*p[13]*p[22] - 2*p[10]*p[13]*p[31] - 2*p[10]*p[14]*p[20] + 2*p[10]*p[14]*p[29];
- coeff[102] = -2*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[8]*p[9]*p[15] + 2*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[12]*p[13]*p[15] - 2*p[7]*p[14]*p[15] + 2*p[7]*p[14]*p[24] - 2*p[8]*p[13]*p[15] + 2*p[8]*p[13]*p[24] - 2*p[9]*p[12]*p[15] + 2*p[9]*p[12]*p[24] - 2*p[10]*p[11]*p[15] + 2*p[10]*p[11]*p[24];
- coeff[103] = -2*p[0]*p[7]*p[10]*p[16] + 2*p[0]*p[8]*p[8]*p[19] - 2*p[0]*p[8]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[11]*p[14]*p[16] - 2*p[0]*p[12]*p[12]*p[19] + 2*p[0]*p[12]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[19] - 2*p[7]*p[11]*p[19] + 2*p[7]*p[11]*p[28] - 2*p[7]*p[14]*p[16] + 2*p[7]*p[14]*p[25] + 2*p[8]*p[12]*p[19] - 2*p[8]*p[12]*p[28] - 2*p[8]*p[13]*p[16] + 2*p[8]*p[13]*p[25] - 2*p[9]*p[12]*p[16] + 2*p[9]*p[12]*p[25] - 2*p[9]*p[13]*p[19] + 2*p[9]*p[13]*p[28] - 2*p[10]*p[11]*p[16] + 2*p[10]*p[11]*p[25] + 2*p[10]*p[14]*p[19] - 2*p[10]*p[14]*p[28];
- coeff[104] = 2*p[0]*p[7]*p[8]*p[22] - 2*p[0]*p[7]*p[10]*p[17] + 2*p[0]*p[8]*p[8]*p[20] - 2*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[9]*p[10]*p[22] + 2*p[0]*p[10]*p[10]*p[20] - 2*p[0]*p[11]*p[12]*p[22] + 2*p[0]*p[11]*p[14]*p[17] - 2*p[0]*p[12]*p[12]*p[20] + 2*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[13]*p[14]*p[22] - 2*p[0]*p[14]*p[14]*p[20] - 2*p[7]*p[11]*p[20] + 2*p[7]*p[11]*p[29] + 2*p[7]*p[12]*p[22] - 2*p[7]*p[12]*p[31] - 2*p[7]*p[14]*p[17] + 2*p[7]*p[14]*p[26] + 2*p[8]*p[11]*p[22] - 2*p[8]*p[11]*p[31] + 2*p[8]*p[12]*p[20] - 2*p[8]*p[12]*p[29] - 2*p[8]*p[13]*p[17] + 2*p[8]*p[13]*p[26] - 2*p[9]*p[12]*p[17] + 2*p[9]*p[12]*p[26] - 2*p[9]*p[13]*p[20] + 2*p[9]*p[13]*p[29] - 2*p[9]*p[14]*p[22] + 2*p[9]*p[14]*p[31] - 2*p[10]*p[11]*p[17] + 2*p[10]*p[11]*p[26] - 2*p[10]*p[13]*p[22] + 2*p[10]*p[13]*p[31] + 2*p[10]*p[14]*p[20] - 2*p[10]*p[14]*p[29];
- coeff[105] = 0;
- coeff[106] = 2*(-p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - p[8]*p[9]*p[15] + p[8]*p[9]*p[24] + p[11]*p[14]*p[15] - p[11]*p[14]*p[24] + p[12]*p[13]*p[15] - p[12]*p[13]*p[24])*p[0];
- coeff[107] = 2*(-p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + p[10]*p[10]*p[19] - p[10]*p[10]*p[28] + p[11]*p[14]*p[16] - p[11]*p[14]*p[25] - p[12]*p[12]*p[19] + p[12]*p[12]*p[28] + p[12]*p[13]*p[16] - p[12]*p[13]*p[25] - p[14]*p[14]*p[19] + p[14]*p[14]*p[28])*p[0];
- coeff[108] = 2*(p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + p[10]*p[10]*p[20] - p[10]*p[10]*p[29] - p[11]*p[12]*p[22] + p[11]*p[12]*p[31] + p[11]*p[14]*p[17] - p[11]*p[14]*p[26] - p[12]*p[12]*p[20] + p[12]*p[12]*p[29] + p[12]*p[13]*p[17] - p[12]*p[13]*p[26] + p[13]*p[14]*p[22] - p[13]*p[14]*p[31] - p[14]*p[14]*p[20] + p[14]*p[14]*p[29])*p[0];
- coeff[109] = 2*(p[7]*p[10]*p[15] - p[7]*p[10]*p[24] + p[8]*p[9]*p[15] - p[8]*p[9]*p[24] - p[11]*p[14]*p[15] + p[11]*p[14]*p[24] - p[12]*p[13]*p[15] + p[12]*p[13]*p[24])*p[0];
- coeff[110] = 2*(p[7]*p[10]*p[16] - p[7]*p[10]*p[25] - p[8]*p[8]*p[19] + p[8]*p[8]*p[28] + p[8]*p[9]*p[16] - p[8]*p[9]*p[25] - p[10]*p[10]*p[19] + p[10]*p[10]*p[28] - p[11]*p[14]*p[16] + p[11]*p[14]*p[25] + p[12]*p[12]*p[19] - p[12]*p[12]*p[28] - p[12]*p[13]*p[16] + p[12]*p[13]*p[25] + p[14]*p[14]*p[19] - p[14]*p[14]*p[28])*p[0];
- coeff[111] = 2*(-p[7]*p[8]*p[22] + p[7]*p[8]*p[31] + p[7]*p[10]*p[17] - p[7]*p[10]*p[26] - p[8]*p[8]*p[20] + p[8]*p[8]*p[29] + p[8]*p[9]*p[17] - p[8]*p[9]*p[26] + p[9]*p[10]*p[22] - p[9]*p[10]*p[31] - p[10]*p[10]*p[20] + p[10]*p[10]*p[29] + p[11]*p[12]*p[22] - p[11]*p[12]*p[31] - p[11]*p[14]*p[17] + p[11]*p[14]*p[26] + p[12]*p[12]*p[20] - p[12]*p[12]*p[29] - p[12]*p[13]*p[17] + p[12]*p[13]*p[26] - p[13]*p[14]*p[22] + p[13]*p[14]*p[31] + p[14]*p[14]*p[20] - p[14]*p[14]*p[29])*p[0];
- coeff[112] = -p[3] + p[6] - p[7]*p[8]*p[21] + p[7]*p[8]*p[30] + p[7]*p[9]*p[18] - p[7]*p[9]*p[27] + p[8]*p[8]*p[23] - p[8]*p[8]*p[32] - p[8]*p[10]*p[18] + p[8]*p[10]*p[27] + p[9]*p[9]*p[23] - p[9]*p[9]*p[32] - p[9]*p[10]*p[21] + p[9]*p[10]*p[30] - p[11]*p[12]*p[21] + p[11]*p[12]*p[30] + p[11]*p[13]*p[18] - p[11]*p[13]*p[27] + p[12]*p[12]*p[23] - p[12]*p[12]*p[32] - p[12]*p[14]*p[18] + p[12]*p[14]*p[27] + p[13]*p[13]*p[23] - p[13]*p[13]*p[32] - p[13]*p[14]*p[21] + p[13]*p[14]*p[30] - p[23] + p[32];
- coeff[113] = 2*p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + p[12]*p[14]*p[24];
- coeff[114] = -2*p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + p[13]*p[14]*p[28];
- coeff[115] = -2*p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + p[13]*p[14]*p[29] - 2*p[22] + p[31];
- coeff[116] = (-p[7]*p[9] + p[8]*p[10] - p[11]*p[13] + p[12]*p[14])*p[15];
- coeff[117] = p[7]*p[8]*p[19] - p[7]*p[9]*p[16] + p[8]*p[10]*p[16] + p[9]*p[10]*p[19] + p[11]*p[12]*p[19] - p[11]*p[13]*p[16] + p[12]*p[14]*p[16] + p[13]*p[14]*p[19];
- coeff[118] = p[7]*p[8]*p[20] - p[7]*p[9]*p[17] - p[8]*p[8]*p[22] + p[8]*p[10]*p[17] - p[9]*p[9]*p[22] + p[9]*p[10]*p[20] + p[11]*p[12]*p[20] - p[11]*p[13]*p[17] - p[12]*p[12]*p[22] + p[12]*p[14]*p[17] - p[13]*p[13]*p[22] + p[13]*p[14]*p[20] + p[22];
- coeff[119] = 0;
- coeff[120] = -2*p[7]*p[9]*p[15] + 2*p[7]*p[9]*p[24] + 2*p[8]*p[10]*p[15] - 2*p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + 2*p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - 2*p[12]*p[14]*p[24];
- coeff[121] = 2*p[7]*p[8]*p[19] - 2*p[7]*p[8]*p[28] - 2*p[7]*p[9]*p[16] + 2*p[7]*p[9]*p[25] + 2*p[8]*p[10]*p[16] - 2*p[8]*p[10]*p[25] + 2*p[9]*p[10]*p[19] - 2*p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - 2*p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + 2*p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - 2*p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - 2*p[13]*p[14]*p[28];
- coeff[122] = 2*p[7]*p[8]*p[20] - 2*p[7]*p[8]*p[29] - 2*p[7]*p[9]*p[17] + 2*p[7]*p[9]*p[26] - 2*p[8]*p[8]*p[22] + 2*p[8]*p[8]*p[31] + 2*p[8]*p[10]*p[17] - 2*p[8]*p[10]*p[26] - 2*p[9]*p[9]*p[22] + 2*p[9]*p[9]*p[31] + 2*p[9]*p[10]*p[20] - 2*p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - 2*p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + 2*p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + 2*p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - 2*p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + 2*p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - 2*p[13]*p[14]*p[29] + 2*p[22] - 2*p[31];
- coeff[123] = 2*p[7]*p[9]*p[15] - 2*p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + 2*p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - 2*p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + 2*p[12]*p[14]*p[24];
- coeff[124] = -2*p[7]*p[8]*p[19] + 2*p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - 2*p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + 2*p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + 2*p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + 2*p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - 2*p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + 2*p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + 2*p[13]*p[14]*p[28];
- coeff[125] = -2*p[7]*p[8]*p[20] + 2*p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - 2*p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - 2*p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + 2*p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - 2*p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + 2*p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + 2*p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - 2*p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - 2*p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + 2*p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - 2*p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + 2*p[13]*p[14]*p[29] - 2*p[22] + 2*p[31];
- coeff[126] = 2*p[0]*p[7]*p[11]*p[23] + 2*p[0]*p[7]*p[12]*p[21] - 2*p[0]*p[7]*p[13]*p[18] + 2*p[0]*p[8]*p[11]*p[21] - 2*p[0]*p[8]*p[12]*p[23] + 2*p[0]*p[8]*p[14]*p[18] - 2*p[0]*p[9]*p[11]*p[18] - 2*p[0]*p[9]*p[13]*p[23] + 2*p[0]*p[9]*p[14]*p[21] + 2*p[0]*p[10]*p[12]*p[18] + 2*p[0]*p[10]*p[13]*p[21] + 2*p[0]*p[10]*p[14]*p[23] - p[7]*p[8]*p[21] + p[7]*p[8]*p[30] + p[7]*p[9]*p[18] - p[7]*p[9]*p[27] + p[8]*p[8]*p[23] - p[8]*p[8]*p[32] - p[8]*p[10]*p[18] + p[8]*p[10]*p[27] + p[9]*p[9]*p[23] - p[9]*p[9]*p[32] - p[9]*p[10]*p[21] + p[9]*p[10]*p[30] + p[11]*p[12]*p[21] - p[11]*p[12]*p[30] - p[11]*p[13]*p[18] + p[11]*p[13]*p[27] - p[12]*p[12]*p[23] + p[12]*p[12]*p[32] + p[12]*p[14]*p[18] - p[12]*p[14]*p[27] - p[13]*p[13]*p[23] + p[13]*p[13]*p[32] + p[13]*p[14]*p[21] - p[13]*p[14]*p[30];
- coeff[127] = -2*p[0]*p[7]*p[13]*p[15] + 2*p[0]*p[8]*p[14]*p[15] - 2*p[0]*p[9]*p[11]*p[15] + 2*p[0]*p[10]*p[12]*p[15] + 2*p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - p[12]*p[14]*p[24];
- coeff[128] = 2*p[0]*p[7]*p[12]*p[19] - 2*p[0]*p[7]*p[13]*p[16] + 2*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[14]*p[16] - 2*p[0]*p[9]*p[11]*p[16] + 2*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[13]*p[19] - 2*p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - p[13]*p[14]*p[28];
- coeff[129] = 2*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[12]*p[20] - 2*p[0]*p[7]*p[13]*p[17] + 2*p[0]*p[8]*p[11]*p[20] - 2*p[0]*p[8]*p[12]*p[22] + 2*p[0]*p[8]*p[14]*p[17] - 2*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[13]*p[22] + 2*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[14]*p[22] - 2*p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - p[13]*p[14]*p[29];
- coeff[130] = (-p[7]*p[9] + p[8]*p[10] + p[11]*p[13] - p[12]*p[14])*p[15];
- coeff[131] = p[7]*p[8]*p[19] - p[7]*p[9]*p[16] + p[8]*p[10]*p[16] + p[9]*p[10]*p[19] - p[11]*p[12]*p[19] + p[11]*p[13]*p[16] - p[12]*p[14]*p[16] - p[13]*p[14]*p[19];
- coeff[132] = p[7]*p[8]*p[20] - p[7]*p[9]*p[17] - p[8]*p[8]*p[22] + p[8]*p[10]*p[17] - p[9]*p[9]*p[22] + p[9]*p[10]*p[20] - p[11]*p[12]*p[20] + p[11]*p[13]*p[17] + p[12]*p[12]*p[22] - p[12]*p[14]*p[17] + p[13]*p[13]*p[22] - p[13]*p[14]*p[20];
- coeff[133] = 2*(-p[7]*p[11]*p[23] + p[7]*p[11]*p[32] - p[7]*p[12]*p[21] + p[7]*p[12]*p[30] + p[7]*p[13]*p[18] - p[7]*p[13]*p[27] - p[8]*p[11]*p[21] + p[8]*p[11]*p[30] + p[8]*p[12]*p[23] - p[8]*p[12]*p[32] - p[8]*p[14]*p[18] + p[8]*p[14]*p[27] + p[9]*p[11]*p[18] - p[9]*p[11]*p[27] + p[9]*p[13]*p[23] - p[9]*p[13]*p[32] - p[9]*p[14]*p[21] + p[9]*p[14]*p[30] - p[10]*p[12]*p[18] + p[10]*p[12]*p[27] - p[10]*p[13]*p[21] + p[10]*p[13]*p[30] - p[10]*p[14]*p[23] + p[10]*p[14]*p[32])*p[0];
- coeff[134] = 4*p[0]*p[7]*p[13]*p[15] - 2*p[0]*p[7]*p[13]*p[24] - 4*p[0]*p[8]*p[14]*p[15] + 2*p[0]*p[8]*p[14]*p[24] + 4*p[0]*p[9]*p[11]*p[15] - 2*p[0]*p[9]*p[11]*p[24] - 4*p[0]*p[10]*p[12]*p[15] + 2*p[0]*p[10]*p[12]*p[24] - 2*p[7]*p[9]*p[15] + 2*p[7]*p[9]*p[24] + 2*p[8]*p[10]*p[15] - 2*p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - 2*p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + 2*p[12]*p[14]*p[24];
- coeff[135] = -4*p[0]*p[7]*p[12]*p[19] + 2*p[0]*p[7]*p[12]*p[28] + 4*p[0]*p[7]*p[13]*p[16] - 2*p[0]*p[7]*p[13]*p[25] - 4*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[11]*p[28] - 4*p[0]*p[8]*p[14]*p[16] + 2*p[0]*p[8]*p[14]*p[25] + 4*p[0]*p[9]*p[11]*p[16] - 2*p[0]*p[9]*p[11]*p[25] - 4*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[9]*p[14]*p[28] - 4*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[12]*p[25] - 4*p[0]*p[10]*p[13]*p[19] + 2*p[0]*p[10]*p[13]*p[28] + 2*p[7]*p[8]*p[19] - 2*p[7]*p[8]*p[28] - 2*p[7]*p[9]*p[16] + 2*p[7]*p[9]*p[25] + 2*p[8]*p[10]*p[16] - 2*p[8]*p[10]*p[25] + 2*p[9]*p[10]*p[19] - 2*p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + 2*p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - 2*p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + 2*p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + 2*p[13]*p[14]*p[28];
- coeff[136] = -4*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[11]*p[31] - 4*p[0]*p[7]*p[12]*p[20] + 2*p[0]*p[7]*p[12]*p[29] + 4*p[0]*p[7]*p[13]*p[17] - 2*p[0]*p[7]*p[13]*p[26] - 4*p[0]*p[8]*p[11]*p[20] + 2*p[0]*p[8]*p[11]*p[29] + 4*p[0]*p[8]*p[12]*p[22] - 2*p[0]*p[8]*p[12]*p[31] - 4*p[0]*p[8]*p[14]*p[17] + 2*p[0]*p[8]*p[14]*p[26] + 4*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[11]*p[26] + 4*p[0]*p[9]*p[13]*p[22] - 2*p[0]*p[9]*p[13]*p[31] - 4*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[9]*p[14]*p[29] - 4*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[12]*p[26] - 4*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[13]*p[29] - 4*p[0]*p[10]*p[14]*p[22] + 2*p[0]*p[10]*p[14]*p[31] + 2*p[7]*p[8]*p[20] - 2*p[7]*p[8]*p[29] - 2*p[7]*p[9]*p[17] + 2*p[7]*p[9]*p[26] - 2*p[8]*p[8]*p[22] + 2*p[8]*p[8]*p[31] + 2*p[8]*p[10]*p[17] - 2*p[8]*p[10]*p[26] - 2*p[9]*p[9]*p[22] + 2*p[9]*p[9]*p[31] + 2*p[9]*p[10]*p[20] - 2*p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + 2*p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - 2*p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - 2*p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + 2*p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - 2*p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + 2*p[13]*p[14]*p[29];
- coeff[137] = -2*p[0]*p[7]*p[13]*p[15] + 2*p[0]*p[8]*p[14]*p[15] - 2*p[0]*p[9]*p[11]*p[15] + 2*p[0]*p[10]*p[12]*p[15] + 2*p[7]*p[9]*p[15] - 2*p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + 2*p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + 2*p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - 2*p[12]*p[14]*p[24];
- coeff[138] = 2*p[0]*p[7]*p[12]*p[19] - 2*p[0]*p[7]*p[13]*p[16] + 2*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[14]*p[16] - 2*p[0]*p[9]*p[11]*p[16] + 2*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[13]*p[19] - 2*p[7]*p[8]*p[19] + 2*p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - 2*p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + 2*p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + 2*p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - 2*p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + 2*p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - 2*p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - 2*p[13]*p[14]*p[28];
- coeff[139] = 2*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[12]*p[20] - 2*p[0]*p[7]*p[13]*p[17] + 2*p[0]*p[8]*p[11]*p[20] - 2*p[0]*p[8]*p[12]*p[22] + 2*p[0]*p[8]*p[14]*p[17] - 2*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[13]*p[22] + 2*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[14]*p[22] - 2*p[7]*p[8]*p[20] + 2*p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - 2*p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - 2*p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + 2*p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - 2*p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + 2*p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - 2*p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + 2*p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + 2*p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - 2*p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + 2*p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - 2*p[13]*p[14]*p[29];
- coeff[140] = 0;
- coeff[141] = 2*(-p[7]*p[13]*p[15] + p[7]*p[13]*p[24] + p[8]*p[14]*p[15] - p[8]*p[14]*p[24] - p[9]*p[11]*p[15] + p[9]*p[11]*p[24] + p[10]*p[12]*p[15] - p[10]*p[12]*p[24])*p[0];
- coeff[142] = 2*(p[7]*p[12]*p[19] - p[7]*p[12]*p[28] - p[7]*p[13]*p[16] + p[7]*p[13]*p[25] + p[8]*p[11]*p[19] - p[8]*p[11]*p[28] + p[8]*p[14]*p[16] - p[8]*p[14]*p[25] - p[9]*p[11]*p[16] + p[9]*p[11]*p[25] + p[9]*p[14]*p[19] - p[9]*p[14]*p[28] + p[10]*p[12]*p[16] - p[10]*p[12]*p[25] + p[10]*p[13]*p[19] - p[10]*p[13]*p[28])*p[0];
- coeff[143] = 2*(p[7]*p[11]*p[22] - p[7]*p[11]*p[31] + p[7]*p[12]*p[20] - p[7]*p[12]*p[29] - p[7]*p[13]*p[17] + p[7]*p[13]*p[26] + p[8]*p[11]*p[20] - p[8]*p[11]*p[29] - p[8]*p[12]*p[22] + p[8]*p[12]*p[31] + p[8]*p[14]*p[17] - p[8]*p[14]*p[26] - p[9]*p[11]*p[17] + p[9]*p[11]*p[26] - p[9]*p[13]*p[22] + p[9]*p[13]*p[31] + p[9]*p[14]*p[20] - p[9]*p[14]*p[29] + p[10]*p[12]*p[17] - p[10]*p[12]*p[26] + p[10]*p[13]*p[20] - p[10]*p[13]*p[29] + p[10]*p[14]*p[22] - p[10]*p[14]*p[31])*p[0];
- coeff[144] = 2*(p[7]*p[13]*p[15] - p[7]*p[13]*p[24] - p[8]*p[14]*p[15] + p[8]*p[14]*p[24] + p[9]*p[11]*p[15] - p[9]*p[11]*p[24] - p[10]*p[12]*p[15] + p[10]*p[12]*p[24])*p[0];
- coeff[145] = 2*(-p[7]*p[12]*p[19] + p[7]*p[12]*p[28] + p[7]*p[13]*p[16] - p[7]*p[13]*p[25] - p[8]*p[11]*p[19] + p[8]*p[11]*p[28] - p[8]*p[14]*p[16] + p[8]*p[14]*p[25] + p[9]*p[11]*p[16] - p[9]*p[11]*p[25] - p[9]*p[14]*p[19] + p[9]*p[14]*p[28] - p[10]*p[12]*p[16] + p[10]*p[12]*p[25] - p[10]*p[13]*p[19] + p[10]*p[13]*p[28])*p[0];
- coeff[146] = 2*(-p[7]*p[11]*p[22] + p[7]*p[11]*p[31] - p[7]*p[12]*p[20] + p[7]*p[12]*p[29] + p[7]*p[13]*p[17] - p[7]*p[13]*p[26] - p[8]*p[11]*p[20] + p[8]*p[11]*p[29] + p[8]*p[12]*p[22] - p[8]*p[12]*p[31] - p[8]*p[14]*p[17] + p[8]*p[14]*p[26] + p[9]*p[11]*p[17] - p[9]*p[11]*p[26] + p[9]*p[13]*p[22] - p[9]*p[13]*p[31] - p[9]*p[14]*p[20] + p[9]*p[14]*p[29] - p[10]*p[12]*p[17] + p[10]*p[12]*p[26] - p[10]*p[13]*p[20] + p[10]*p[13]*p[29] - p[10]*p[14]*p[22] + p[10]*p[14]*p[31])*p[0];
- coeff[147] = -2*p[0]*p[7]*p[8]*p[21] + 2*p[0]*p[7]*p[9]*p[18] + 2*p[0]*p[8]*p[8]*p[23] - 2*p[0]*p[8]*p[10]*p[18] + 2*p[0]*p[9]*p[9]*p[23] - 2*p[0]*p[9]*p[10]*p[21] + 2*p[0]*p[11]*p[12]*p[21] - 2*p[0]*p[11]*p[13]*p[18] - 2*p[0]*p[12]*p[12]*p[23] + 2*p[0]*p[12]*p[14]*p[18] - 2*p[0]*p[13]*p[13]*p[23] + 2*p[0]*p[13]*p[14]*p[21] - p[7]*p[11]*p[23] + p[7]*p[11]*p[32] - p[7]*p[12]*p[21] + p[7]*p[12]*p[30] + p[7]*p[13]*p[18] - p[7]*p[13]*p[27] - p[8]*p[11]*p[21] + p[8]*p[11]*p[30] + p[8]*p[12]*p[23] - p[8]*p[12]*p[32] - p[8]*p[14]*p[18] + p[8]*p[14]*p[27] + p[9]*p[11]*p[18] - p[9]*p[11]*p[27] + p[9]*p[13]*p[23] - p[9]*p[13]*p[32] - p[9]*p[14]*p[21] + p[9]*p[14]*p[30] - p[10]*p[12]*p[18] + p[10]*p[12]*p[27] - p[10]*p[13]*p[21] + p[10]*p[13]*p[30] - p[10]*p[14]*p[23] + p[10]*p[14]*p[32];
- coeff[148] = 2*p[0]*p[7]*p[9]*p[15] - 2*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[11]*p[13]*p[15] + 2*p[0]*p[12]*p[14]*p[15] + 2*p[7]*p[13]*p[15] - p[7]*p[13]*p[24] - 2*p[8]*p[14]*p[15] + p[8]*p[14]*p[24] + 2*p[9]*p[11]*p[15] - p[9]*p[11]*p[24] - 2*p[10]*p[12]*p[15] + p[10]*p[12]*p[24];
- coeff[149] = -2*p[0]*p[7]*p[8]*p[19] + 2*p[0]*p[7]*p[9]*p[16] - 2*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[9]*p[10]*p[19] + 2*p[0]*p[11]*p[12]*p[19] - 2*p[0]*p[11]*p[13]*p[16] + 2*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[13]*p[14]*p[19] - 2*p[7]*p[12]*p[19] + p[7]*p[12]*p[28] + 2*p[7]*p[13]*p[16] - p[7]*p[13]*p[25] - 2*p[8]*p[11]*p[19] + p[8]*p[11]*p[28] - 2*p[8]*p[14]*p[16] + p[8]*p[14]*p[25] + 2*p[9]*p[11]*p[16] - p[9]*p[11]*p[25] - 2*p[9]*p[14]*p[19] + p[9]*p[14]*p[28] - 2*p[10]*p[12]*p[16] + p[10]*p[12]*p[25] - 2*p[10]*p[13]*p[19] + p[10]*p[13]*p[28];
- coeff[150] = -2*p[0]*p[7]*p[8]*p[20] + 2*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[8]*p[8]*p[22] - 2*p[0]*p[8]*p[10]*p[17] + 2*p[0]*p[9]*p[9]*p[22] - 2*p[0]*p[9]*p[10]*p[20] + 2*p[0]*p[11]*p[12]*p[20] - 2*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[12]*p[12]*p[22] + 2*p[0]*p[12]*p[14]*p[17] - 2*p[0]*p[13]*p[13]*p[22] + 2*p[0]*p[13]*p[14]*p[20] - 2*p[7]*p[11]*p[22] + p[7]*p[11]*p[31] - 2*p[7]*p[12]*p[20] + p[7]*p[12]*p[29] + 2*p[7]*p[13]*p[17] - p[7]*p[13]*p[26] - 2*p[8]*p[11]*p[20] + p[8]*p[11]*p[29] + 2*p[8]*p[12]*p[22] - p[8]*p[12]*p[31] - 2*p[8]*p[14]*p[17] + p[8]*p[14]*p[26] + 2*p[9]*p[11]*p[17] - p[9]*p[11]*p[26] + 2*p[9]*p[13]*p[22] - p[9]*p[13]*p[31] - 2*p[9]*p[14]*p[20] + p[9]*p[14]*p[29] - 2*p[10]*p[12]*p[17] + p[10]*p[12]*p[26] - 2*p[10]*p[13]*p[20] + p[10]*p[13]*p[29] - 2*p[10]*p[14]*p[22] + p[10]*p[14]*p[31];
- coeff[151] = (-p[7]*p[13] + p[8]*p[14] - p[9]*p[11] + p[10]*p[12])*p[15];
- coeff[152] = p[7]*p[12]*p[19] - p[7]*p[13]*p[16] + p[8]*p[11]*p[19] + p[8]*p[14]*p[16] - p[9]*p[11]*p[16] + p[9]*p[14]*p[19] + p[10]*p[12]*p[16] + p[10]*p[13]*p[19];
- coeff[153] = p[7]*p[11]*p[22] + p[7]*p[12]*p[20] - p[7]*p[13]*p[17] + p[8]*p[11]*p[20] - p[8]*p[12]*p[22] + p[8]*p[14]*p[17] - p[9]*p[11]*p[17] - p[9]*p[13]*p[22] + p[9]*p[14]*p[20] + p[10]*p[12]*p[17] + p[10]*p[13]*p[20] + p[10]*p[14]*p[22];
- coeff[154] = 2*(p[7]*p[8]*p[21] - p[7]*p[8]*p[30] - p[7]*p[9]*p[18] + p[7]*p[9]*p[27] - p[8]*p[8]*p[23] + p[8]*p[8]*p[32] + p[8]*p[10]*p[18] - p[8]*p[10]*p[27] - p[9]*p[9]*p[23] + p[9]*p[9]*p[32] + p[9]*p[10]*p[21] - p[9]*p[10]*p[30] - p[11]*p[12]*p[21] + p[11]*p[12]*p[30] + p[11]*p[13]*p[18] - p[11]*p[13]*p[27] + p[12]*p[12]*p[23] - p[12]*p[12]*p[32] - p[12]*p[14]*p[18] + p[12]*p[14]*p[27] + p[13]*p[13]*p[23] - p[13]*p[13]*p[32] - p[13]*p[14]*p[21] + p[13]*p[14]*p[30])*p[0];
- coeff[155] = -4*p[0]*p[7]*p[9]*p[15] + 2*p[0]*p[7]*p[9]*p[24] + 4*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[8]*p[10]*p[24] + 4*p[0]*p[11]*p[13]*p[15] - 2*p[0]*p[11]*p[13]*p[24] - 4*p[0]*p[12]*p[14]*p[15] + 2*p[0]*p[12]*p[14]*p[24] - 2*p[7]*p[13]*p[15] + 2*p[7]*p[13]*p[24] + 2*p[8]*p[14]*p[15] - 2*p[8]*p[14]*p[24] - 2*p[9]*p[11]*p[15] + 2*p[9]*p[11]*p[24] + 2*p[10]*p[12]*p[15] - 2*p[10]*p[12]*p[24];
- coeff[156] = 4*p[0]*p[7]*p[8]*p[19] - 2*p[0]*p[7]*p[8]*p[28] - 4*p[0]*p[7]*p[9]*p[16] + 2*p[0]*p[7]*p[9]*p[25] + 4*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[8]*p[10]*p[25] + 4*p[0]*p[9]*p[10]*p[19] - 2*p[0]*p[9]*p[10]*p[28] - 4*p[0]*p[11]*p[12]*p[19] + 2*p[0]*p[11]*p[12]*p[28] + 4*p[0]*p[11]*p[13]*p[16] - 2*p[0]*p[11]*p[13]*p[25] - 4*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[12]*p[14]*p[25] - 4*p[0]*p[13]*p[14]*p[19] + 2*p[0]*p[13]*p[14]*p[28] + 2*p[7]*p[12]*p[19] - 2*p[7]*p[12]*p[28] - 2*p[7]*p[13]*p[16] + 2*p[7]*p[13]*p[25] + 2*p[8]*p[11]*p[19] - 2*p[8]*p[11]*p[28] + 2*p[8]*p[14]*p[16] - 2*p[8]*p[14]*p[25] - 2*p[9]*p[11]*p[16] + 2*p[9]*p[11]*p[25] + 2*p[9]*p[14]*p[19] - 2*p[9]*p[14]*p[28] + 2*p[10]*p[12]*p[16] - 2*p[10]*p[12]*p[25] + 2*p[10]*p[13]*p[19] - 2*p[10]*p[13]*p[28];
- coeff[157] = 4*p[0]*p[7]*p[8]*p[20] - 2*p[0]*p[7]*p[8]*p[29] - 4*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[7]*p[9]*p[26] - 4*p[0]*p[8]*p[8]*p[22] + 2*p[0]*p[8]*p[8]*p[31] + 4*p[0]*p[8]*p[10]*p[17] - 2*p[0]*p[8]*p[10]*p[26] - 4*p[0]*p[9]*p[9]*p[22] + 2*p[0]*p[9]*p[9]*p[31] + 4*p[0]*p[9]*p[10]*p[20] - 2*p[0]*p[9]*p[10]*p[29] - 4*p[0]*p[11]*p[12]*p[20] + 2*p[0]*p[11]*p[12]*p[29] + 4*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[11]*p[13]*p[26] + 4*p[0]*p[12]*p[12]*p[22] - 2*p[0]*p[12]*p[12]*p[31] - 4*p[0]*p[12]*p[14]*p[17] + 2*p[0]*p[12]*p[14]*p[26] + 4*p[0]*p[13]*p[13]*p[22] - 2*p[0]*p[13]*p[13]*p[31] - 4*p[0]*p[13]*p[14]*p[20] + 2*p[0]*p[13]*p[14]*p[29] + 2*p[7]*p[11]*p[22] - 2*p[7]*p[11]*p[31] + 2*p[7]*p[12]*p[20] - 2*p[7]*p[12]*p[29] - 2*p[7]*p[13]*p[17] + 2*p[7]*p[13]*p[26] + 2*p[8]*p[11]*p[20] - 2*p[8]*p[11]*p[29] - 2*p[8]*p[12]*p[22] + 2*p[8]*p[12]*p[31] + 2*p[8]*p[14]*p[17] - 2*p[8]*p[14]*p[26] - 2*p[9]*p[11]*p[17] + 2*p[9]*p[11]*p[26] - 2*p[9]*p[13]*p[22] + 2*p[9]*p[13]*p[31] + 2*p[9]*p[14]*p[20] - 2*p[9]*p[14]*p[29] + 2*p[10]*p[12]*p[17] - 2*p[10]*p[12]*p[26] + 2*p[10]*p[13]*p[20] - 2*p[10]*p[13]*p[29] + 2*p[10]*p[14]*p[22] - 2*p[10]*p[14]*p[31];
- coeff[158] = 2*p[0]*p[7]*p[9]*p[15] - 2*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[11]*p[13]*p[15] + 2*p[0]*p[12]*p[14]*p[15] + 2*p[7]*p[13]*p[15] - 2*p[7]*p[13]*p[24] - 2*p[8]*p[14]*p[15] + 2*p[8]*p[14]*p[24] + 2*p[9]*p[11]*p[15] - 2*p[9]*p[11]*p[24] - 2*p[10]*p[12]*p[15] + 2*p[10]*p[12]*p[24];
- coeff[159] = -2*p[0]*p[7]*p[8]*p[19] + 2*p[0]*p[7]*p[9]*p[16] - 2*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[9]*p[10]*p[19] + 2*p[0]*p[11]*p[12]*p[19] - 2*p[0]*p[11]*p[13]*p[16] + 2*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[13]*p[14]*p[19] - 2*p[7]*p[12]*p[19] + 2*p[7]*p[12]*p[28] + 2*p[7]*p[13]*p[16] - 2*p[7]*p[13]*p[25] - 2*p[8]*p[11]*p[19] + 2*p[8]*p[11]*p[28] - 2*p[8]*p[14]*p[16] + 2*p[8]*p[14]*p[25] + 2*p[9]*p[11]*p[16] - 2*p[9]*p[11]*p[25] - 2*p[9]*p[14]*p[19] + 2*p[9]*p[14]*p[28] - 2*p[10]*p[12]*p[16] + 2*p[10]*p[12]*p[25] - 2*p[10]*p[13]*p[19] + 2*p[10]*p[13]*p[28];
- coeff[160] = -2*p[0]*p[7]*p[8]*p[20] + 2*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[8]*p[8]*p[22] - 2*p[0]*p[8]*p[10]*p[17] + 2*p[0]*p[9]*p[9]*p[22] - 2*p[0]*p[9]*p[10]*p[20] + 2*p[0]*p[11]*p[12]*p[20] - 2*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[12]*p[12]*p[22] + 2*p[0]*p[12]*p[14]*p[17] - 2*p[0]*p[13]*p[13]*p[22] + 2*p[0]*p[13]*p[14]*p[20] - 2*p[7]*p[11]*p[22] + 2*p[7]*p[11]*p[31] - 2*p[7]*p[12]*p[20] + 2*p[7]*p[12]*p[29] + 2*p[7]*p[13]*p[17] - 2*p[7]*p[13]*p[26] - 2*p[8]*p[11]*p[20] + 2*p[8]*p[11]*p[29] + 2*p[8]*p[12]*p[22] - 2*p[8]*p[12]*p[31] - 2*p[8]*p[14]*p[17] + 2*p[8]*p[14]*p[26] + 2*p[9]*p[11]*p[17] - 2*p[9]*p[11]*p[26] + 2*p[9]*p[13]*p[22] - 2*p[9]*p[13]*p[31] - 2*p[9]*p[14]*p[20] + 2*p[9]*p[14]*p[29] - 2*p[10]*p[12]*p[17] + 2*p[10]*p[12]*p[26] - 2*p[10]*p[13]*p[20] + 2*p[10]*p[13]*p[29] - 2*p[10]*p[14]*p[22] + 2*p[10]*p[14]*p[31];
- coeff[161] = 0;
- coeff[162] = 2*(p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - p[8]*p[10]*p[15] + p[8]*p[10]*p[24] - p[11]*p[13]*p[15] + p[11]*p[13]*p[24] + p[12]*p[14]*p[15] - p[12]*p[14]*p[24])*p[0];
- coeff[163] = 2*(-p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - p[9]*p[10]*p[19] + p[9]*p[10]*p[28] + p[11]*p[12]*p[19] - p[11]*p[12]*p[28] - p[11]*p[13]*p[16] + p[11]*p[13]*p[25] + p[12]*p[14]*p[16] - p[12]*p[14]*p[25] + p[13]*p[14]*p[19] - p[13]*p[14]*p[28])*p[0];
- coeff[164] = 2*(-p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - p[9]*p[10]*p[20] + p[9]*p[10]*p[29] + p[11]*p[12]*p[20] - p[11]*p[12]*p[29] - p[11]*p[13]*p[17] + p[11]*p[13]*p[26] - p[12]*p[12]*p[22] + p[12]*p[12]*p[31] + p[12]*p[14]*p[17] - p[12]*p[14]*p[26] - p[13]*p[13]*p[22] + p[13]*p[13]*p[31] + p[13]*p[14]*p[20] - p[13]*p[14]*p[29])*p[0];
- coeff[165] = 2*(-p[7]*p[9]*p[15] + p[7]*p[9]*p[24] + p[8]*p[10]*p[15] - p[8]*p[10]*p[24] + p[11]*p[13]*p[15] - p[11]*p[13]*p[24] - p[12]*p[14]*p[15] + p[12]*p[14]*p[24])*p[0];
- coeff[166] = 2*(p[7]*p[8]*p[19] - p[7]*p[8]*p[28] - p[7]*p[9]*p[16] + p[7]*p[9]*p[25] + p[8]*p[10]*p[16] - p[8]*p[10]*p[25] + p[9]*p[10]*p[19] - p[9]*p[10]*p[28] - p[11]*p[12]*p[19] + p[11]*p[12]*p[28] + p[11]*p[13]*p[16] - p[11]*p[13]*p[25] - p[12]*p[14]*p[16] + p[12]*p[14]*p[25] - p[13]*p[14]*p[19] + p[13]*p[14]*p[28])*p[0];
- coeff[167] = 2*(p[7]*p[8]*p[20] - p[7]*p[8]*p[29] - p[7]*p[9]*p[17] + p[7]*p[9]*p[26] - p[8]*p[8]*p[22] + p[8]*p[8]*p[31] + p[8]*p[10]*p[17] - p[8]*p[10]*p[26] - p[9]*p[9]*p[22] + p[9]*p[9]*p[31] + p[9]*p[10]*p[20] - p[9]*p[10]*p[29] - p[11]*p[12]*p[20] + p[11]*p[12]*p[29] + p[11]*p[13]*p[17] - p[11]*p[13]*p[26] + p[12]*p[12]*p[22] - p[12]*p[12]*p[31] - p[12]*p[14]*p[17] + p[12]*p[14]*p[26] + p[13]*p[13]*p[22] - p[13]*p[13]*p[31] - p[13]*p[14]*p[20] + p[13]*p[14]*p[29])*p[0];
-}
-
-} // namespace embree
diff --git a/thirdparty/embree-aarch64/kernels/common/point_query.h b/thirdparty/embree-aarch64/kernels/common/point_query.h
deleted file mode 100644
index 27d158ca3a..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/point_query.h
+++ /dev/null
@@ -1,136 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-
-namespace embree
-{
- /* Point query structure for closest point query */
- template<int K>
- struct RTC_ALIGN(16) PointQueryK
- {
- /* Default construction does nothing */
- __forceinline PointQueryK() {}
-
- /* Constructs a ray from origin, direction, and ray segment. Near
- * has to be smaller than far */
- __forceinline PointQueryK(const Vec3vf<K>& p, const vfloat<K>& radius = inf, const vfloat<K>& time = zero)
- : p(p), time(time), radius(radius) {}
-
- /* Returns the size of the ray */
- static __forceinline size_t size() { return K; }
-
- /* Calculates if this is a valid ray that does not cause issues during traversal */
- __forceinline vbool<K> valid() const
- {
- const vbool<K> vx = (abs(p.x) <= vfloat<K>(FLT_LARGE));
- const vbool<K> vy = (abs(p.y) <= vfloat<K>(FLT_LARGE));
- const vbool<K> vz = (abs(p.z) <= vfloat<K>(FLT_LARGE));
- const vbool<K> vn = radius >= vfloat<K>(0);
- const vbool<K> vf = abs(time) < vfloat<K>(inf);
- return vx & vy & vz & vn & vf;
- }
-
- __forceinline void get(PointQueryK<1>* ray) const;
- __forceinline void get(size_t i, PointQueryK<1>& ray) const;
- __forceinline void set(const PointQueryK<1>* ray);
- __forceinline void set(size_t i, const PointQueryK<1>& ray);
-
- Vec3vf<K> p; // location of the query point
- vfloat<K> time; // time for motion blur
- vfloat<K> radius; // radius for the point query
- };
-
- /* Specialization for a single point query */
- template<>
- struct RTC_ALIGN(16) PointQueryK<1>
- {
- /* Default construction does nothing */
- __forceinline PointQueryK() {}
-
- /* Constructs a ray from origin, direction, and ray segment. Near
- * has to be smaller than far */
- __forceinline PointQueryK(const Vec3fa& p, float radius = inf, float time = zero)
- : p(p), time(time), radius(radius) {}
-
- /* Calculates if this is a valid ray that does not cause issues during traversal */
- __forceinline bool valid() const {
- return all(le_mask(abs(Vec3fa(p)), Vec3fa(FLT_LARGE)) & le_mask(Vec3fa(0.f), Vec3fa(radius))) && abs(time) < float(inf);
- }
-
- Vec3f p;
- float time;
- float radius;
- };
-
- /* Converts point query packet to single point query */
- template<int K>
- __forceinline void PointQueryK<K>::get(PointQueryK<1>* query) const
- {
- for (size_t i = 0; i < K; i++) // FIXME: use SIMD transpose
- {
- query[i].p.x = p.x[i];
- query[i].p.y = p.y[i];
- query[i].p.z = p.z[i];
- query[i].time = time[i];
- query[i].radius = radius[i];
- }
- }
-
- /* Extracts a single point query out of a point query packet*/
- template<int K>
- __forceinline void PointQueryK<K>::get(size_t i, PointQueryK<1>& query) const
- {
- query.p.x = p.x[i];
- query.p.y = p.y[i];
- query.p.z = p.z[i];
- query.radius = radius[i];
- query.time = time[i];
- }
-
- /* Converts single point query to point query packet */
- template<int K>
- __forceinline void PointQueryK<K>::set(const PointQueryK<1>* query)
- {
- for (size_t i = 0; i < K; i++)
- {
- p.x[i] = query[i].p.x;
- p.y[i] = query[i].p.y;
- p.z[i] = query[i].p.z;
- radius[i] = query[i].radius;
- time[i] = query[i].time;
- }
- }
-
- /* inserts a single point query into a point query packet element */
- template<int K>
- __forceinline void PointQueryK<K>::set(size_t i, const PointQueryK<1>& query)
- {
- p.x[i] = query.p.x;
- p.y[i] = query.p.y;
- p.z[i] = query.p.z;
- radius[i] = query.radius;
- time[i] = query.time;
- }
-
- /* Shortcuts */
- typedef PointQueryK<1> PointQuery;
- typedef PointQueryK<4> PointQuery4;
- typedef PointQueryK<8> PointQuery8;
- typedef PointQueryK<16> PointQuery16;
- struct PointQueryN;
-
- /* Outputs point query to stream */
- template<int K>
- __forceinline embree_ostream operator <<(embree_ostream cout, const PointQueryK<K>& query)
- {
- cout << "{ " << embree_endl
- << " p = " << query.p << embree_endl
- << " r = " << query.radius << embree_endl
- << " time = " << query.time << embree_endl
- << "}";
- return cout;
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/primref.h b/thirdparty/embree-aarch64/kernels/common/primref.h
deleted file mode 100644
index ce75c982bb..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/primref.h
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-
-namespace embree
-{
- /*! A primitive reference stores the bounds of the primitive and its ID. */
- struct __aligned(32) PrimRef
- {
- __forceinline PrimRef () {}
-
-#if defined(__AVX__)
- __forceinline PrimRef(const PrimRef& v) {
- vfloat8::store((float*)this,vfloat8::load((float*)&v));
- }
- __forceinline PrimRef& operator=(const PrimRef& v) {
- vfloat8::store((float*)this,vfloat8::load((float*)&v)); return *this;
- }
-#endif
-
- __forceinline PrimRef (const BBox3fa& bounds, unsigned int geomID, unsigned int primID)
- {
- lower = Vec3fx(bounds.lower, geomID);
- upper = Vec3fx(bounds.upper, primID);
- }
-
- __forceinline PrimRef (const BBox3fa& bounds, size_t id)
- {
-#if defined(__X86_64__) || defined(__aarch64__)
- lower = Vec3fx(bounds.lower, (unsigned)(id & 0xFFFFFFFF));
- upper = Vec3fx(bounds.upper, (unsigned)((id >> 32) & 0xFFFFFFFF));
-#else
- lower = Vec3fx(bounds.lower, (unsigned)id);
- upper = Vec3fx(bounds.upper, (unsigned)0);
-#endif
- }
-
- /*! calculates twice the center of the primitive */
- __forceinline const Vec3fa center2() const {
- return lower+upper;
- }
-
- /*! return the bounding box of the primitive */
- __forceinline const BBox3fa bounds() const {
- return BBox3fa(lower,upper);
- }
-
- /*! size for bin heuristic is 1 */
- __forceinline unsigned size() const {
- return 1;
- }
-
- /*! returns bounds and centroid used for binning */
- __forceinline void binBoundsAndCenter(BBox3fa& bounds_o, Vec3fa& center_o) const
- {
- bounds_o = bounds();
- center_o = embree::center2(bounds_o);
- }
-
- __forceinline unsigned& geomIDref() { // FIXME: remove !!!!!!!
- return lower.u;
- }
- __forceinline unsigned& primIDref() { // FIXME: remove !!!!!!!
- return upper.u;
- }
-
- /*! returns the geometry ID */
- __forceinline unsigned geomID() const {
- return lower.a;
- }
-
- /*! returns the primitive ID */
- __forceinline unsigned primID() const {
- return upper.a;
- }
-
- /*! returns an size_t sized ID */
- __forceinline size_t ID() const {
-#if defined(__X86_64__) || defined(__aarch64__)
- return size_t(lower.u) + (size_t(upper.u) << 32);
-#else
- return size_t(lower.u);
-#endif
- }
-
- /*! special function for operator< */
- __forceinline uint64_t ID64() const {
- return (((uint64_t)primID()) << 32) + (uint64_t)geomID();
- }
-
- /*! allows sorting the primrefs by ID */
- friend __forceinline bool operator<(const PrimRef& p0, const PrimRef& p1) {
- return p0.ID64() < p1.ID64();
- }
-
- /*! Outputs primitive reference to a stream. */
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRef& ref) {
- return cout << "{ lower = " << ref.lower << ", upper = " << ref.upper << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << " }";
- }
-
- public:
- Vec3fx lower; //!< lower bounds and geomID
- Vec3fx upper; //!< upper bounds and primID
- };
-
- /*! fast exchange for PrimRefs */
- __forceinline void xchg(PrimRef& a, PrimRef& b)
- {
-#if defined(__AVX__)
- const vfloat8 aa = vfloat8::load((float*)&a);
- const vfloat8 bb = vfloat8::load((float*)&b);
- vfloat8::store((float*)&a,bb);
- vfloat8::store((float*)&b,aa);
-#else
- std::swap(a,b);
-#endif
- }
-
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
- struct SubGridBuildData {
- unsigned short sx,sy;
- unsigned int primID;
-
- __forceinline SubGridBuildData() {};
- __forceinline SubGridBuildData(const unsigned int sx, const unsigned int sy, const unsigned int primID) : sx(sx), sy(sy), primID(primID) {};
-
- __forceinline size_t x() const { return (size_t)sx & 0x7fff; }
- __forceinline size_t y() const { return (size_t)sy & 0x7fff; }
-
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/primref_mb.h b/thirdparty/embree-aarch64/kernels/common/primref_mb.h
deleted file mode 100644
index b6c1ad5712..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/primref_mb.h
+++ /dev/null
@@ -1,262 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-
-#define MBLUR_BIN_LBBOX 1
-
-namespace embree
-{
-#if MBLUR_BIN_LBBOX
-
- /*! A primitive reference stores the bounds of the primitive and its ID. */
- struct PrimRefMB
- {
- typedef LBBox3fa BBox;
-
- __forceinline PrimRefMB () {}
-
- __forceinline PrimRefMB (const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, unsigned int geomID, unsigned int primID)
- : lbounds((LBBox3fx)lbounds_i), time_range(time_range)
- {
- assert(activeTimeSegments > 0);
- lbounds.bounds0.lower.a = geomID;
- lbounds.bounds0.upper.a = primID;
- lbounds.bounds1.lower.a = activeTimeSegments;
- lbounds.bounds1.upper.a = totalTimeSegments;
- }
-
- __forceinline PrimRefMB (EmptyTy empty, const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id)
- : lbounds((LBBox3fx)lbounds_i), time_range(time_range)
- {
- assert(activeTimeSegments > 0);
-#if defined(__X86_64__) || defined(__aarch64__)
- lbounds.bounds0.lower.a = id & 0xFFFFFFFF;
- lbounds.bounds0.upper.a = (id >> 32) & 0xFFFFFFFF;
-#else
- lbounds.bounds0.lower.a = id;
- lbounds.bounds0.upper.a = 0;
-#endif
- lbounds.bounds1.lower.a = activeTimeSegments;
- lbounds.bounds1.upper.a = totalTimeSegments;
- }
-
- __forceinline PrimRefMB (const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id)
- : lbounds((LBBox3fx)lbounds_i), time_range(time_range)
- {
- assert(activeTimeSegments > 0);
-#if defined(__X86_64__) || defined(__aarch64__)
- lbounds.bounds0.lower.u = id & 0xFFFFFFFF;
- lbounds.bounds0.upper.u = (id >> 32) & 0xFFFFFFFF;
-#else
- lbounds.bounds0.lower.u = id;
- lbounds.bounds0.upper.u = 0;
-#endif
- lbounds.bounds1.lower.a = activeTimeSegments;
- lbounds.bounds1.upper.a = totalTimeSegments;
- }
-
- /*! returns bounds for binning */
- __forceinline LBBox3fa bounds() const {
- return lbounds;
- }
-
- /*! returns the number of time segments of this primref */
- __forceinline unsigned size() const {
- return lbounds.bounds1.lower.a;
- }
-
- __forceinline unsigned totalTimeSegments() const {
- return lbounds.bounds1.upper.a;
- }
-
- /* calculate overlapping time segment range */
- __forceinline range<int> timeSegmentRange(const BBox1f& range) const {
- return getTimeSegmentRange(range,time_range,float(totalTimeSegments()));
- }
-
- /* returns time that corresponds to time step */
- __forceinline float timeStep(const int i) const {
- assert(i>=0 && i<=(int)totalTimeSegments());
- return time_range.lower + time_range.size()*float(i)/float(totalTimeSegments());
- }
-
- /*! checks if time range overlaps */
- __forceinline bool time_range_overlap(const BBox1f& range) const
- {
- if (0.9999f*time_range.upper <= range.lower) return false;
- if (1.0001f*time_range.lower >= range.upper) return false;
- return true;
- }
-
- /*! returns center for binning */
- __forceinline Vec3fa binCenter() const {
- return center2(lbounds.interpolate(0.5f));
- }
-
- /*! returns bounds and centroid used for binning */
- __forceinline void binBoundsAndCenter(LBBox3fa& bounds_o, Vec3fa& center_o) const
- {
- bounds_o = bounds();
- center_o = binCenter();
- }
-
- /*! returns the geometry ID */
- __forceinline unsigned geomID() const {
- return lbounds.bounds0.lower.a;
- }
-
- /*! returns the primitive ID */
- __forceinline unsigned primID() const {
- return lbounds.bounds0.upper.a;
- }
-
- /*! returns an size_t sized ID */
- __forceinline size_t ID() const {
-#if defined(__X86_64__) || defined(__aarch64__)
- return size_t(lbounds.bounds0.lower.u) + (size_t(lbounds.bounds0.upper.u) << 32);
-#else
- return size_t(lbounds.bounds0.lower.u);
-#endif
- }
-
- /*! special function for operator< */
- __forceinline uint64_t ID64() const {
- return (((uint64_t)primID()) << 32) + (uint64_t)geomID();
- }
-
- /*! allows sorting the primrefs by ID */
- friend __forceinline bool operator<(const PrimRefMB& p0, const PrimRefMB& p1) {
- return p0.ID64() < p1.ID64();
- }
-
- /*! Outputs primitive reference to a stream. */
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRefMB& ref) {
- return cout << "{ time_range = " << ref.time_range << ", bounds = " << ref.bounds() << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << ", active_segments = " << ref.size() << ", total_segments = " << ref.totalTimeSegments() << " }";
- }
-
- public:
- LBBox3fx lbounds;
- BBox1f time_range; // entire geometry time range
- };
-
-#else
-
- /*! A primitive reference stores the bounds of the primitive and its ID. */
- struct __aligned(16) PrimRefMB
- {
- typedef BBox3fa BBox;
-
- __forceinline PrimRefMB () {}
-
- __forceinline PrimRefMB (const LBBox3fa& bounds, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, unsigned int geomID, unsigned int primID)
- : bbox(bounds.interpolate(0.5f)), _activeTimeSegments(activeTimeSegments), _totalTimeSegments(totalTimeSegments), time_range(time_range)
- {
- assert(activeTimeSegments > 0);
- bbox.lower.a = geomID;
- bbox.upper.a = primID;
- }
-
- __forceinline PrimRefMB (EmptyTy empty, const LBBox3fa& bounds, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id)
- : bbox(bounds.interpolate(0.5f)), _activeTimeSegments(activeTimeSegments), _totalTimeSegments(totalTimeSegments), time_range(time_range)
- {
- assert(activeTimeSegments > 0);
-#if defined(__X86_64__) || defined(__aarch64__)
- bbox.lower.u = id & 0xFFFFFFFF;
- bbox.upper.u = (id >> 32) & 0xFFFFFFFF;
-#else
- bbox.lower.u = id;
- bbox.upper.u = 0;
-#endif
- }
-
- /*! returns bounds for binning */
- __forceinline BBox3fa bounds() const {
- return bbox;
- }
-
- /*! returns the number of time segments of this primref */
- __forceinline unsigned int size() const {
- return _activeTimeSegments;
- }
-
- __forceinline unsigned int totalTimeSegments() const {
- return _totalTimeSegments;
- }
-
- /* calculate overlapping time segment range */
- __forceinline range<int> timeSegmentRange(const BBox1f& range) const {
- return getTimeSegmentRange(range,time_range,float(_totalTimeSegments));
- }
-
- /* returns time that corresponds to time step */
- __forceinline float timeStep(const int i) const {
- assert(i>=0 && i<=(int)_totalTimeSegments);
- return time_range.lower + time_range.size()*float(i)/float(_totalTimeSegments);
- }
-
- /*! checks if time range overlaps */
- __forceinline bool time_range_overlap(const BBox1f& range) const
- {
- if (0.9999f*time_range.upper <= range.lower) return false;
- if (1.0001f*time_range.lower >= range.upper) return false;
- return true;
- }
-
- /*! returns center for binning */
- __forceinline Vec3fa binCenter() const {
- return center2(bounds());
- }
-
- /*! returns bounds and centroid used for binning */
- __forceinline void binBoundsAndCenter(BBox3fa& bounds_o, Vec3fa& center_o) const
- {
- bounds_o = bounds();
- center_o = center2(bounds());
- }
-
- /*! returns the geometry ID */
- __forceinline unsigned int geomID() const {
- return bbox.lower.a;
- }
-
- /*! returns the primitive ID */
- __forceinline unsigned int primID() const {
- return bbox.upper.a;
- }
-
- /*! returns an size_t sized ID */
- __forceinline size_t ID() const {
-#if defined(__X86_64__) || defined(__aarch64__)
- return size_t(bbox.lower.u) + (size_t(bbox.upper.u) << 32);
-#else
- return size_t(bbox.lower.u);
-#endif
- }
-
- /*! special function for operator< */
- __forceinline uint64_t ID64() const {
- return (((uint64_t)primID()) << 32) + (uint64_t)geomID();
- }
-
- /*! allows sorting the primrefs by ID */
- friend __forceinline bool operator<(const PrimRefMB& p0, const PrimRefMB& p1) {
- return p0.ID64() < p1.ID64();
- }
-
- /*! Outputs primitive reference to a stream. */
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRefMB& ref) {
- return cout << "{ bounds = " << ref.bounds() << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << ", active_segments = " << ref.size() << ", total_segments = " << ref.totalTimeSegments() << " }";
- }
-
- public:
- BBox3fa bbox; // bounds, geomID, primID
- unsigned int _activeTimeSegments;
- unsigned int _totalTimeSegments;
- BBox1f time_range; // entire geometry time range
- };
-
-#endif
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/profile.h b/thirdparty/embree-aarch64/kernels/common/profile.h
deleted file mode 100644
index a7de36414d..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/profile.h
+++ /dev/null
@@ -1,159 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-
-namespace embree
-{
- /*! helper structure for the implementation of the profile functions below */
- struct ProfileTimer
- {
- static const size_t N = 20;
-
- ProfileTimer () {}
-
- ProfileTimer (const size_t numSkip) : i(0), j(0), maxJ(0), numSkip(numSkip), t0(0)
- {
- for (size_t i=0; i<N; i++) names[i] = nullptr;
- for (size_t i=0; i<N; i++) dt_fst[i] = 0.0;
- for (size_t i=0; i<N; i++) dt_min[i] = pos_inf;
- for (size_t i=0; i<N; i++) dt_avg[i] = 0.0;
- for (size_t i=0; i<N; i++) dt_max[i] = neg_inf;
- }
-
- __forceinline void begin()
- {
- j=0;
- t0 = tj = getSeconds();
- }
-
- __forceinline void end() {
- absolute("total");
- i++;
- }
-
- __forceinline void operator() (const char* name) {
- relative(name);
- }
-
- __forceinline void absolute (const char* name)
- {
- const double t1 = getSeconds();
- const double dt = t1-t0;
- assert(names[j] == nullptr || names[j] == name);
- names[j] = name;
- if (i == 0) dt_fst[j] = dt;
- if (i>=numSkip) {
- dt_min[j] = min(dt_min[j],dt);
- dt_avg[j] = dt_avg[j] + dt;
- dt_max[j] = max(dt_max[j],dt);
- }
- j++;
- maxJ = max(maxJ,j);
- }
-
- __forceinline void relative (const char* name)
- {
- const double t1 = getSeconds();
- const double dt = t1-tj;
- tj = t1;
- assert(names[j] == nullptr || names[j] == name);
- names[j] = name;
- if (i == 0) dt_fst[j] = dt;
- if (i>=numSkip) {
- dt_min[j] = min(dt_min[j],dt);
- dt_avg[j] = dt_avg[j] + dt;
- dt_max[j] = max(dt_max[j],dt);
- }
- j++;
- maxJ = max(maxJ,j);
- }
-
- void print(size_t numElements)
- {
- for (size_t k=0; k<N; k++)
- dt_avg[k] /= double(i-numSkip);
-
- printf(" profile [M/s]:\n");
- for (size_t j=0; j<maxJ; j++)
- printf("%20s: fst = %7.2f M/s, min = %7.2f M/s, avg = %7.2f M/s, max = %7.2f M/s\n",
- names[j],numElements/dt_fst[j]*1E-6,numElements/dt_max[j]*1E-6,numElements/dt_avg[j]*1E-6,numElements/dt_min[j]*1E-6);
-
- printf(" profile [ms]:\n");
- for (size_t j=0; j<maxJ; j++)
- printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",
- names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);
- }
-
- void print()
- {
- printf(" profile:\n");
-
- for (size_t k=0; k<N; k++)
- dt_avg[k] /= double(i-numSkip);
-
- for (size_t j=0; j<maxJ; j++) {
- printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",
- names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);
- }
- }
-
- double avg() {
- return dt_avg[maxJ-1]/double(i-numSkip);
- }
-
- private:
- size_t i;
- size_t j;
- size_t maxJ;
- size_t numSkip;
- double t0;
- double tj;
- const char* names[N];
- double dt_fst[N];
- double dt_min[N];
- double dt_avg[N];
- double dt_max[N];
- };
-
- /*! This function executes some code block multiple times and measured sections of it.
- Use the following way:
-
- profile(1,10,1000,[&](ProfileTimer& timer) {
- // code
- timer("A");
- // code
- timer("B");
- });
- */
- template<typename Closure>
- void profile(const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)
- {
- ProfileTimer timer(numSkip);
-
- for (size_t i=0; i<numSkip+numIter; i++)
- {
- timer.begin();
- closure(timer);
- timer.end();
- }
- timer.print(numElements);
- }
-
- /*! similar as the function above, but the timer object comes externally */
- template<typename Closure>
- void profile(ProfileTimer& timer, const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)
- {
- timer = ProfileTimer(numSkip);
-
- for (size_t i=0; i<numSkip+numIter; i++)
- {
- timer.begin();
- closure(timer);
- timer.end();
- }
- timer.print(numElements);
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/ray.h b/thirdparty/embree-aarch64/kernels/common/ray.h
deleted file mode 100644
index 336d48942c..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/ray.h
+++ /dev/null
@@ -1,1517 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "instance_stack.h"
-
-// FIXME: if ray gets seperated into ray* and hit, uload4 needs to be adjusted
-
-namespace embree
-{
- static const size_t MAX_INTERNAL_STREAM_SIZE = 32;
-
- /* Ray structure for K rays */
- template<int K>
- struct RayK
- {
- /* Default construction does nothing */
- __forceinline RayK() {}
-
- /* Constructs a ray from origin, direction, and ray segment. Near
- * has to be smaller than far */
- __forceinline RayK(const Vec3vf<K>& org, const Vec3vf<K>& dir,
- const vfloat<K>& tnear = zero, const vfloat<K>& tfar = inf,
- const vfloat<K>& time = zero, const vint<K>& mask = -1, const vint<K>& id = 0, const vint<K>& flags = 0)
- : org(org), dir(dir), _tnear(tnear), tfar(tfar), _time(time), mask(mask), id(id), flags(flags) {}
-
- /* Returns the size of the ray */
- static __forceinline size_t size() { return K; }
-
- /* Calculates if this is a valid ray that does not cause issues during traversal */
- __forceinline vbool<K> valid() const
- {
- const vbool<K> vx = (abs(org.x) <= vfloat<K>(FLT_LARGE)) & (abs(dir.x) <= vfloat<K>(FLT_LARGE));
- const vbool<K> vy = (abs(org.y) <= vfloat<K>(FLT_LARGE)) & (abs(dir.y) <= vfloat<K>(FLT_LARGE));
- const vbool<K> vz = (abs(org.z) <= vfloat<K>(FLT_LARGE)) & (abs(dir.z) <= vfloat<K>(FLT_LARGE));
- const vbool<K> vn = abs(tnear()) <= vfloat<K>(inf);
- const vbool<K> vf = abs(tfar) <= vfloat<K>(inf);
- return vx & vy & vz & vn & vf;
- }
-
- __forceinline void get(RayK<1>* ray) const;
- __forceinline void get(size_t i, RayK<1>& ray) const;
- __forceinline void set(const RayK<1>* ray);
- __forceinline void set(size_t i, const RayK<1>& ray);
-
- __forceinline void copy(size_t dest, size_t source);
-
- __forceinline vint<K> octant() const
- {
- return select(dir.x < 0.0f, vint<K>(1), vint<K>(zero)) |
- select(dir.y < 0.0f, vint<K>(2), vint<K>(zero)) |
- select(dir.z < 0.0f, vint<K>(4), vint<K>(zero));
- }
-
- /* Ray data */
- Vec3vf<K> org; // ray origin
- vfloat<K> _tnear; // start of ray segment
- Vec3vf<K> dir; // ray direction
- vfloat<K> _time; // time of this ray for motion blur
- vfloat<K> tfar; // end of ray segment
- vint<K> mask; // used to mask out objects during traversal
- vint<K> id;
- vint<K> flags;
-
- __forceinline vfloat<K>& tnear() { return _tnear; }
- __forceinline vfloat<K>& time() { return _time; }
- __forceinline const vfloat<K>& tnear() const { return _tnear; }
- __forceinline const vfloat<K>& time() const { return _time; }
- };
-
- /* Ray+hit structure for K rays */
- template<int K>
- struct RayHitK : RayK<K>
- {
- using RayK<K>::org;
- using RayK<K>::_tnear;
- using RayK<K>::dir;
- using RayK<K>::_time;
- using RayK<K>::tfar;
- using RayK<K>::mask;
- using RayK<K>::id;
- using RayK<K>::flags;
-
- using RayK<K>::tnear;
- using RayK<K>::time;
-
- /* Default construction does nothing */
- __forceinline RayHitK() {}
-
- /* Constructs a ray from origin, direction, and ray segment. Near
- * has to be smaller than far */
- __forceinline RayHitK(const Vec3vf<K>& org, const Vec3vf<K>& dir,
- const vfloat<K>& tnear = zero, const vfloat<K>& tfar = inf,
- const vfloat<K>& time = zero, const vint<K>& mask = -1, const vint<K>& id = 0, const vint<K>& flags = 0)
- : RayK<K>(org, dir, tnear, tfar, time, mask, id, flags),
- geomID(RTC_INVALID_GEOMETRY_ID)
- {
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
- instID[l] = RTC_INVALID_GEOMETRY_ID;
- }
-
- __forceinline RayHitK(const RayK<K>& ray)
- : RayK<K>(ray),
- geomID(RTC_INVALID_GEOMETRY_ID)
- {
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
- instID[l] = RTC_INVALID_GEOMETRY_ID;
- }
-
- __forceinline RayHitK<K>& operator =(const RayK<K>& ray)
- {
- org = ray.org;
- _tnear = ray._tnear;
- dir = ray.dir;
- _time = ray._time;
- tfar = ray.tfar;
- mask = ray.mask;
- id = ray.id;
- flags = ray.flags;
-
- geomID = RTC_INVALID_GEOMETRY_ID;
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
- instID[l] = RTC_INVALID_GEOMETRY_ID;
-
- return *this;
- }
-
- /* Calculates if the hit is valid */
- __forceinline void verifyHit(const vbool<K>& valid0) const
- {
- vbool<K> valid = valid0 & geomID != vuint<K>(RTC_INVALID_GEOMETRY_ID);
- const vbool<K> vt = (abs(tfar) <= vfloat<K>(FLT_LARGE)) | (tfar == vfloat<K>(neg_inf));
- const vbool<K> vu = (abs(u) <= vfloat<K>(FLT_LARGE));
- const vbool<K> vv = (abs(u) <= vfloat<K>(FLT_LARGE));
- const vbool<K> vnx = abs(Ng.x) <= vfloat<K>(FLT_LARGE);
- const vbool<K> vny = abs(Ng.y) <= vfloat<K>(FLT_LARGE);
- const vbool<K> vnz = abs(Ng.z) <= vfloat<K>(FLT_LARGE);
- if (any(valid & !vt)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid t");
- if (any(valid & !vu)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid u");
- if (any(valid & !vv)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid v");
- if (any(valid & !vnx)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.x");
- if (any(valid & !vny)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.y");
- if (any(valid & !vnz)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.z");
- }
-
- __forceinline void get(RayHitK<1>* ray) const;
- __forceinline void get(size_t i, RayHitK<1>& ray) const;
- __forceinline void set(const RayHitK<1>* ray);
- __forceinline void set(size_t i, const RayHitK<1>& ray);
-
- __forceinline void copy(size_t dest, size_t source);
-
- /* Hit data */
- Vec3vf<K> Ng; // geometry normal
- vfloat<K> u; // barycentric u coordinate of hit
- vfloat<K> v; // barycentric v coordinate of hit
- vuint<K> primID; // primitive ID
- vuint<K> geomID; // geometry ID
- vuint<K> instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
- };
-
- /* Specialization for a single ray */
- template<>
- struct RayK<1>
- {
- /* Default construction does nothing */
- __forceinline RayK() {}
-
- /* Constructs a ray from origin, direction, and ray segment. Near
- * has to be smaller than far */
- __forceinline RayK(const Vec3fa& org, const Vec3fa& dir, float tnear = zero, float tfar = inf, float time = zero, int mask = -1, int id = 0, int flags = 0)
- : org(org,tnear), dir(dir,time), tfar(tfar), mask(mask), id(id), flags(flags) {}
-
- /* Calculates if this is a valid ray that does not cause issues during traversal */
- __forceinline bool valid() const {
- return all(le_mask(abs(Vec3fa(org)), Vec3fa(FLT_LARGE)) & le_mask(abs(Vec3fa(dir)), Vec3fa(FLT_LARGE))) && abs(tnear()) <= float(inf) && abs(tfar) <= float(inf);
- }
-
- /* Ray data */
- Vec3ff org; // 3 floats for ray origin, 1 float for tnear
- //float tnear; // start of ray segment
- Vec3ff dir; // 3 floats for ray direction, 1 float for time
- // float time;
- float tfar; // end of ray segment
- int mask; // used to mask out objects during traversal
- int id; // ray ID
- int flags; // ray flags
-
- __forceinline float& tnear() { return org.w; };
- __forceinline const float& tnear() const { return org.w; };
-
- __forceinline float& time() { return dir.w; };
- __forceinline const float& time() const { return dir.w; };
-
- };
-
- template<>
- struct RayHitK<1> : RayK<1>
- {
- /* Default construction does nothing */
- __forceinline RayHitK() {}
-
- /* Constructs a ray from origin, direction, and ray segment. Near
- * has to be smaller than far */
- __forceinline RayHitK(const Vec3fa& org, const Vec3fa& dir, float tnear = zero, float tfar = inf, float time = zero, int mask = -1, int id = 0, int flags = 0)
- : RayK<1>(org, dir, tnear, tfar, time, mask, id, flags),
- geomID(RTC_INVALID_GEOMETRY_ID) {}
-
- __forceinline RayHitK(const RayK<1>& ray)
- : RayK<1>(ray),
- geomID(RTC_INVALID_GEOMETRY_ID) {}
-
- __forceinline RayHitK<1>& operator =(const RayK<1>& ray)
- {
- org = ray.org;
- dir = ray.dir;
- tfar = ray.tfar;
- mask = ray.mask;
- id = ray.id;
- flags = ray.flags;
-
- geomID = RTC_INVALID_GEOMETRY_ID;
-
- return *this;
- }
-
- /* Calculates if the hit is valid */
- __forceinline void verifyHit() const
- {
- if (geomID == RTC_INVALID_GEOMETRY_ID) return;
- const bool vt = (abs(tfar) <= FLT_LARGE) || (tfar == float(neg_inf));
- const bool vu = (abs(u) <= FLT_LARGE);
- const bool vv = (abs(u) <= FLT_LARGE);
- const bool vnx = abs(Ng.x) <= FLT_LARGE;
- const bool vny = abs(Ng.y) <= FLT_LARGE;
- const bool vnz = abs(Ng.z) <= FLT_LARGE;
- if (!vt) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid t");
- if (!vu) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid u");
- if (!vv) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid v");
- if (!vnx) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.x");
- if (!vny) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.y");
- if (!vnz) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.z");
- }
-
- /* Hit data */
- Vec3f Ng; // not normalized geometry normal
- float u; // barycentric u coordinate of hit
- float v; // barycentric v coordinate of hit
- unsigned int primID; // primitive ID
- unsigned int geomID; // geometry ID
- unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
- };
-
- /* Converts ray packet to single rays */
- template<int K>
- __forceinline void RayK<K>::get(RayK<1>* ray) const
- {
- for (size_t i = 0; i < K; i++) // FIXME: use SIMD transpose
- {
- ray[i].org.x = org.x[i]; ray[i].org.y = org.y[i]; ray[i].org.z = org.z[i]; ray[i].tnear() = tnear()[i];
- ray[i].dir.x = dir.x[i]; ray[i].dir.y = dir.y[i]; ray[i].dir.z = dir.z[i]; ray[i].time() = time()[i];
- ray[i].tfar = tfar[i]; ray[i].mask = mask[i]; ray[i].id = id[i]; ray[i].flags = flags[i];
- }
- }
-
- template<int K>
- __forceinline void RayHitK<K>::get(RayHitK<1>* ray) const
- {
- // FIXME: use SIMD transpose
- for (size_t i = 0; i < K; i++)
- get(i, ray[i]);
- }
-
- /* Extracts a single ray out of a ray packet*/
- template<int K>
- __forceinline void RayK<K>::get(size_t i, RayK<1>& ray) const
- {
- ray.org.x = org.x[i]; ray.org.y = org.y[i]; ray.org.z = org.z[i]; ray.tnear() = tnear()[i];
- ray.dir.x = dir.x[i]; ray.dir.y = dir.y[i]; ray.dir.z = dir.z[i]; ray.time() = time()[i];
- ray.tfar = tfar[i]; ray.mask = mask[i]; ray.id = id[i]; ray.flags = flags[i];
- }
-
- template<int K>
- __forceinline void RayHitK<K>::get(size_t i, RayHitK<1>& ray) const
- {
- ray.org.x = org.x[i]; ray.org.y = org.y[i]; ray.org.z = org.z[i]; ray.tnear() = tnear()[i];
- ray.dir.x = dir.x[i]; ray.dir.y = dir.y[i]; ray.dir.z = dir.z[i]; ray.tfar = tfar[i]; ray.time() = time()[i];
- ray.mask = mask[i]; ray.id = id[i]; ray.flags = flags[i];
- ray.Ng.x = Ng.x[i]; ray.Ng.y = Ng.y[i]; ray.Ng.z = Ng.z[i];
- ray.u = u[i]; ray.v = v[i];
- ray.primID = primID[i]; ray.geomID = geomID[i];
-
- instance_id_stack::copy(instID, ray.instID, i);
- }
-
- /* Converts single rays to ray packet */
- template<int K>
- __forceinline void RayK<K>::set(const RayK<1>* ray)
- {
- // FIXME: use SIMD transpose
- for (size_t i = 0; i < K; i++)
- set(i, ray[i]);
- }
-
- template<int K>
- __forceinline void RayHitK<K>::set(const RayHitK<1>* ray)
- {
- // FIXME: use SIMD transpose
- for (size_t i = 0; i < K; i++)
- set(i, ray[i]);
- }
-
- /* inserts a single ray into a ray packet element */
- template<int K>
- __forceinline void RayK<K>::set(size_t i, const RayK<1>& ray)
- {
- org.x[i] = ray.org.x; org.y[i] = ray.org.y; org.z[i] = ray.org.z; tnear()[i] = ray.tnear();
- dir.x[i] = ray.dir.x; dir.y[i] = ray.dir.y; dir.z[i] = ray.dir.z; time()[i] = ray.time();
- tfar[i] = ray.tfar; mask[i] = ray.mask; id[i] = ray.id; flags[i] = ray.flags;
- }
-
- template<int K>
- __forceinline void RayHitK<K>::set(size_t i, const RayHitK<1>& ray)
- {
- org.x[i] = ray.org.x; org.y[i] = ray.org.y; org.z[i] = ray.org.z; tnear()[i] = ray.tnear();
- dir.x[i] = ray.dir.x; dir.y[i] = ray.dir.y; dir.z[i] = ray.dir.z; time()[i] = ray.time();
- tfar[i] = ray.tfar; mask[i] = ray.mask; id[i] = ray.id; flags[i] = ray.flags;
- Ng.x[i] = ray.Ng.x; Ng.y[i] = ray.Ng.y; Ng.z[i] = ray.Ng.z;
- u[i] = ray.u; v[i] = ray.v;
- primID[i] = ray.primID; geomID[i] = ray.geomID;
-
- instance_id_stack::copy(ray.instID, instID, i);
- }
-
- /* copies a ray packet element into another element*/
- template<int K>
- __forceinline void RayK<K>::copy(size_t dest, size_t source)
- {
- org.x[dest] = org.x[source]; org.y[dest] = org.y[source]; org.z[dest] = org.z[source]; tnear()[dest] = tnear()[source];
- dir.x[dest] = dir.x[source]; dir.y[dest] = dir.y[source]; dir.z[dest] = dir.z[source]; time()[dest] = time()[source];
- tfar [dest] = tfar[source]; mask[dest] = mask[source]; id[dest] = id[source]; flags[dest] = flags[source];
- }
-
- template<int K>
- __forceinline void RayHitK<K>::copy(size_t dest, size_t source)
- {
- org.x[dest] = org.x[source]; org.y[dest] = org.y[source]; org.z[dest] = org.z[source]; tnear()[dest] = tnear()[source];
- dir.x[dest] = dir.x[source]; dir.y[dest] = dir.y[source]; dir.z[dest] = dir.z[source]; time()[dest] = time()[source];
- tfar [dest] = tfar[source]; mask[dest] = mask[source]; id[dest] = id[source]; flags[dest] = flags[source];
- Ng.x[dest] = Ng.x[source]; Ng.y[dest] = Ng.y[source]; Ng.z[dest] = Ng.z[source];
- u[dest] = u[source]; v[dest] = v[source];
- primID[dest] = primID[source]; geomID[dest] = geomID[source];
-
- instance_id_stack::copy(instID, instID, source, dest);
- }
-
- /* Shortcuts */
- typedef RayK<1> Ray;
- typedef RayK<4> Ray4;
- typedef RayK<8> Ray8;
- typedef RayK<16> Ray16;
- struct RayN;
-
- typedef RayHitK<1> RayHit;
- typedef RayHitK<4> RayHit4;
- typedef RayHitK<8> RayHit8;
- typedef RayHitK<16> RayHit16;
- struct RayHitN;
-
- template<int K, bool intersect>
- struct RayTypeHelper;
-
- template<int K>
- struct RayTypeHelper<K, true>
- {
- typedef RayHitK<K> Ty;
- };
-
- template<int K>
- struct RayTypeHelper<K, false>
- {
- typedef RayK<K> Ty;
- };
-
- template<bool intersect>
- using RayType = typename RayTypeHelper<1, intersect>::Ty;
-
- template<int K, bool intersect>
- using RayTypeK = typename RayTypeHelper<K, intersect>::Ty;
-
- /* Outputs ray to stream */
- template<int K>
- __forceinline embree_ostream operator <<(embree_ostream cout, const RayK<K>& ray)
- {
- return cout << "{ " << embree_endl
- << " org = " << ray.org << embree_endl
- << " dir = " << ray.dir << embree_endl
- << " near = " << ray.tnear() << embree_endl
- << " far = " << ray.tfar << embree_endl
- << " time = " << ray.time() << embree_endl
- << " mask = " << ray.mask << embree_endl
- << " id = " << ray.id << embree_endl
- << " flags = " << ray.flags << embree_endl
- << "}";
- }
-
- template<int K>
- __forceinline embree_ostream operator <<(embree_ostream cout, const RayHitK<K>& ray)
- {
- cout << "{ " << embree_endl
- << " org = " << ray.org << embree_endl
- << " dir = " << ray.dir << embree_endl
- << " near = " << ray.tnear() << embree_endl
- << " far = " << ray.tfar << embree_endl
- << " time = " << ray.time() << embree_endl
- << " mask = " << ray.mask << embree_endl
- << " id = " << ray.id << embree_endl
- << " flags = " << ray.flags << embree_endl
- << " Ng = " << ray.Ng
- << " u = " << ray.u << embree_endl
- << " v = " << ray.v << embree_endl
- << " primID = " << ray.primID << embree_endl
- << " geomID = " << ray.geomID << embree_endl
- << " instID =";
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
- {
- cout << " " << ray.instID[l];
- }
- cout << embree_endl;
- return cout << "}";
- }
-
- struct RayStreamSOA
- {
- __forceinline RayStreamSOA(void* rays, size_t N)
- : ptr((char*)rays), N(N) {}
-
- /* ray data access functions */
- __forceinline float* org_x(size_t offset = 0) { return (float*)&ptr[0*4*N+offset]; } // x coordinate of ray origin
- __forceinline float* org_y(size_t offset = 0) { return (float*)&ptr[1*4*N+offset]; } // y coordinate of ray origin
- __forceinline float* org_z(size_t offset = 0) { return (float*)&ptr[2*4*N+offset]; }; // z coordinate of ray origin
- __forceinline float* tnear(size_t offset = 0) { return (float*)&ptr[3*4*N+offset]; }; // start of ray segment
-
- __forceinline float* dir_x(size_t offset = 0) { return (float*)&ptr[4*4*N+offset]; }; // x coordinate of ray direction
- __forceinline float* dir_y(size_t offset = 0) { return (float*)&ptr[5*4*N+offset]; }; // y coordinate of ray direction
- __forceinline float* dir_z(size_t offset = 0) { return (float*)&ptr[6*4*N+offset]; }; // z coordinate of ray direction
- __forceinline float* time (size_t offset = 0) { return (float*)&ptr[7*4*N+offset]; }; // time of this ray for motion blur
-
- __forceinline float* tfar (size_t offset = 0) { return (float*)&ptr[8*4*N+offset]; }; // end of ray segment (set to hit distance)
- __forceinline int* mask (size_t offset = 0) { return (int*)&ptr[9*4*N+offset]; }; // used to mask out objects during traversal (optional)
- __forceinline int* id (size_t offset = 0) { return (int*)&ptr[10*4*N+offset]; }; // id
- __forceinline int* flags(size_t offset = 0) { return (int*)&ptr[11*4*N+offset]; }; // flags
-
- /* hit data access functions */
- __forceinline float* Ng_x(size_t offset = 0) { return (float*)&ptr[12*4*N+offset]; }; // x coordinate of geometry normal
- __forceinline float* Ng_y(size_t offset = 0) { return (float*)&ptr[13*4*N+offset]; }; // y coordinate of geometry normal
- __forceinline float* Ng_z(size_t offset = 0) { return (float*)&ptr[14*4*N+offset]; }; // z coordinate of geometry normal
-
- __forceinline float* u(size_t offset = 0) { return (float*)&ptr[15*4*N+offset]; }; // barycentric u coordinate of hit
- __forceinline float* v(size_t offset = 0) { return (float*)&ptr[16*4*N+offset]; }; // barycentric v coordinate of hit
-
- __forceinline unsigned int* primID(size_t offset = 0) { return (unsigned int*)&ptr[17*4*N+offset]; }; // primitive ID
- __forceinline unsigned int* geomID(size_t offset = 0) { return (unsigned int*)&ptr[18*4*N+offset]; }; // geometry ID
- __forceinline unsigned int* instID(size_t level, size_t offset = 0) { return (unsigned int*)&ptr[19*4*N+level*4*N+offset]; }; // instance ID
-
- __forceinline Ray getRayByOffset(size_t offset)
- {
- Ray ray;
- ray.org.x = org_x(offset)[0];
- ray.org.y = org_y(offset)[0];
- ray.org.z = org_z(offset)[0];
- ray.tnear() = tnear(offset)[0];
- ray.dir.x = dir_x(offset)[0];
- ray.dir.y = dir_y(offset)[0];
- ray.dir.z = dir_z(offset)[0];
- ray.time() = time(offset)[0];
- ray.tfar = tfar(offset)[0];
- ray.mask = mask(offset)[0];
- ray.id = id(offset)[0];
- ray.flags = flags(offset)[0];
- return ray;
- }
-
- template<int K>
- __forceinline RayK<K> getRayByOffset(size_t offset)
- {
- RayK<K> ray;
- ray.org.x = vfloat<K>::loadu(org_x(offset));
- ray.org.y = vfloat<K>::loadu(org_y(offset));
- ray.org.z = vfloat<K>::loadu(org_z(offset));
- ray.tnear = vfloat<K>::loadu(tnear(offset));
- ray.dir.x = vfloat<K>::loadu(dir_x(offset));
- ray.dir.y = vfloat<K>::loadu(dir_y(offset));
- ray.dir.z = vfloat<K>::loadu(dir_z(offset));
- ray.time = vfloat<K>::loadu(time(offset));
- ray.tfar = vfloat<K>::loadu(tfar(offset));
- ray.mask = vint<K>::loadu(mask(offset));
- ray.id = vint<K>::loadu(id(offset));
- ray.flags = vint<K>::loadu(flags(offset));
- return ray;
- }
-
- template<int K>
- __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, size_t offset)
- {
- RayK<K> ray;
- ray.org.x = vfloat<K>::loadu(valid, org_x(offset));
- ray.org.y = vfloat<K>::loadu(valid, org_y(offset));
- ray.org.z = vfloat<K>::loadu(valid, org_z(offset));
- ray.tnear() = vfloat<K>::loadu(valid, tnear(offset));
- ray.dir.x = vfloat<K>::loadu(valid, dir_x(offset));
- ray.dir.y = vfloat<K>::loadu(valid, dir_y(offset));
- ray.dir.z = vfloat<K>::loadu(valid, dir_z(offset));
- ray.time() = vfloat<K>::loadu(valid, time(offset));
- ray.tfar = vfloat<K>::loadu(valid, tfar(offset));
-
-#if !defined(__AVX__)
- /* SSE: some ray members must be loaded with scalar instructions to ensure that we don't cause memory faults,
- because the SSE masked loads always access the entire vector */
- if (unlikely(!all(valid)))
- {
- ray.mask = zero;
- ray.id = zero;
- ray.flags = zero;
-
- for (size_t k = 0; k < K; k++)
- {
- if (likely(valid[k]))
- {
- ray.mask[k] = mask(offset)[k];
- ray.id[k] = id(offset)[k];
- ray.flags[k] = flags(offset)[k];
- }
- }
- }
- else
-#endif
- {
- ray.mask = vint<K>::loadu(valid, mask(offset));
- ray.id = vint<K>::loadu(valid, id(offset));
- ray.flags = vint<K>::loadu(valid, flags(offset));
- }
-
- return ray;
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayHitK<K>& ray)
- {
- /*
- * valid_i: stores which of the input rays exist (do not access nonexistent rays!)
- * valid: stores which of the rays actually hit something.
- */
- vbool<K> valid = valid_i;
- valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
-
- if (likely(any(valid)))
- {
- vfloat<K>::storeu(valid, tfar(offset), ray.tfar);
- vfloat<K>::storeu(valid, Ng_x(offset), ray.Ng.x);
- vfloat<K>::storeu(valid, Ng_y(offset), ray.Ng.y);
- vfloat<K>::storeu(valid, Ng_z(offset), ray.Ng.z);
- vfloat<K>::storeu(valid, u(offset), ray.u);
- vfloat<K>::storeu(valid, v(offset), ray.v);
-
-#if !defined(__AVX__)
- /* SSE: some ray members must be stored with scalar instructions to ensure that we don't cause memory faults,
- because the SSE masked stores always access the entire vector */
- if (unlikely(!all(valid_i)))
- {
- for (size_t k = 0; k < K; k++)
- {
- if (likely(valid[k]))
- {
- primID(offset)[k] = ray.primID[k];
- geomID(offset)[k] = ray.geomID[k];
-
- instID(0, offset)[k] = ray.instID[0][k];
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l)
- instID(l, offset)[k] = ray.instID[l][k];
-#endif
- }
- }
- }
- else
-#endif
- {
- vuint<K>::storeu(valid, primID(offset), ray.primID);
- vuint<K>::storeu(valid, geomID(offset), ray.geomID);
-
- vuint<K>::storeu(valid, instID(0, offset), ray.instID[0]);
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
- vuint<K>::storeu(valid, instID(l, offset), ray.instID[l]);
-#endif
- }
- }
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.tfar < 0.0f);
-
- if (likely(any(valid)))
- vfloat<K>::storeu(valid, tfar(offset), ray.tfar);
- }
-
- __forceinline size_t getOctantByOffset(size_t offset)
- {
- const float dx = dir_x(offset)[0];
- const float dy = dir_y(offset)[0];
- const float dz = dir_z(offset)[0];
- const size_t octantID = (dx < 0.0f ? 1 : 0) + (dy < 0.0f ? 2 : 0) + (dz < 0.0f ? 4 : 0);
- return octantID;
- }
-
- __forceinline bool isValidByOffset(size_t offset)
- {
- const float nnear = tnear(offset)[0];
- const float ffar = tfar(offset)[0];
- return nnear <= ffar;
- }
-
- template<int K>
- __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
- {
- RayK<K> ray;
-
-#if defined(__AVX2__)
- ray.org.x = vfloat<K>::template gather<1>(valid, org_x(), offset);
- ray.org.y = vfloat<K>::template gather<1>(valid, org_y(), offset);
- ray.org.z = vfloat<K>::template gather<1>(valid, org_z(), offset);
- ray.tnear() = vfloat<K>::template gather<1>(valid, tnear(), offset);
- ray.dir.x = vfloat<K>::template gather<1>(valid, dir_x(), offset);
- ray.dir.y = vfloat<K>::template gather<1>(valid, dir_y(), offset);
- ray.dir.z = vfloat<K>::template gather<1>(valid, dir_z(), offset);
- ray.time() = vfloat<K>::template gather<1>(valid, time(), offset);
- ray.tfar = vfloat<K>::template gather<1>(valid, tfar(), offset);
- ray.mask = vint<K>::template gather<1>(valid, mask(), offset);
- ray.id = vint<K>::template gather<1>(valid, id(), offset);
- ray.flags = vint<K>::template gather<1>(valid, flags(), offset);
-#else
- ray.org = zero;
- ray.tnear() = zero;
- ray.dir = zero;
- ray.time() = zero;
- ray.tfar = zero;
- ray.mask = zero;
- ray.id = zero;
- ray.flags = zero;
-
- for (size_t k = 0; k < K; k++)
- {
- if (likely(valid[k]))
- {
- const size_t ofs = offset[k];
-
- ray.org.x[k] = *org_x(ofs);
- ray.org.y[k] = *org_y(ofs);
- ray.org.z[k] = *org_z(ofs);
- ray.tnear()[k] = *tnear(ofs);
- ray.dir.x[k] = *dir_x(ofs);
- ray.dir.y[k] = *dir_y(ofs);
- ray.dir.z[k] = *dir_z(ofs);
- ray.time()[k] = *time(ofs);
- ray.tfar[k] = *tfar(ofs);
- ray.mask[k] = *mask(ofs);
- ray.id[k] = *id(ofs);
- ray.flags[k] = *flags(ofs);
- }
- }
-#endif
-
- return ray;
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
-
- if (likely(any(valid)))
- {
-#if defined(__AVX512F__)
- vfloat<K>::template scatter<1>(valid, tfar(), offset, ray.tfar);
- vfloat<K>::template scatter<1>(valid, Ng_x(), offset, ray.Ng.x);
- vfloat<K>::template scatter<1>(valid, Ng_y(), offset, ray.Ng.y);
- vfloat<K>::template scatter<1>(valid, Ng_z(), offset, ray.Ng.z);
- vfloat<K>::template scatter<1>(valid, u(), offset, ray.u);
- vfloat<K>::template scatter<1>(valid, v(), offset, ray.v);
- vuint<K>::template scatter<1>(valid, primID(), offset, ray.primID);
- vuint<K>::template scatter<1>(valid, geomID(), offset, ray.geomID);
-
- vuint<K>::template scatter<1>(valid, instID(0), offset, ray.instID[0]);
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
- vuint<K>::template scatter<1>(valid, instID(l), offset, ray.instID[l]);
-#endif
-#else
- size_t valid_bits = movemask(valid);
- while (valid_bits != 0)
- {
- const size_t k = bscf(valid_bits);
- const size_t ofs = offset[k];
-
- *tfar(ofs) = ray.tfar[k];
-
- *Ng_x(ofs) = ray.Ng.x[k];
- *Ng_y(ofs) = ray.Ng.y[k];
- *Ng_z(ofs) = ray.Ng.z[k];
- *u(ofs) = ray.u[k];
- *v(ofs) = ray.v[k];
- *primID(ofs) = ray.primID[k];
- *geomID(ofs) = ray.geomID[k];
-
- *instID(0, ofs) = ray.instID[0][k];
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l)
- *instID(l, ofs) = ray.instID[l][k];
-#endif
- }
-#endif
- }
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.tfar < 0.0f);
-
- if (likely(any(valid)))
- {
-#if defined(__AVX512F__)
- vfloat<K>::template scatter<1>(valid, tfar(), offset, ray.tfar);
-#else
- size_t valid_bits = movemask(valid);
- while (valid_bits != 0)
- {
- const size_t k = bscf(valid_bits);
- const size_t ofs = offset[k];
-
- *tfar(ofs) = ray.tfar[k];
- }
-#endif
- }
- }
-
- char* __restrict__ ptr;
- size_t N;
- };
-
- template<size_t MAX_K>
- struct StackRayStreamSOA : public RayStreamSOA
- {
- __forceinline StackRayStreamSOA(size_t K)
- : RayStreamSOA(data, K) { assert(K <= MAX_K); }
-
- char data[MAX_K / 4 * sizeof(RayHit4)];
- };
-
-
- struct RayStreamSOP
- {
- template<class T>
- __forceinline void init(T& t)
- {
- org_x = (float*)&t.org.x;
- org_y = (float*)&t.org.y;
- org_z = (float*)&t.org.z;
- tnear = (float*)&t.tnear;
- dir_x = (float*)&t.dir.x;
- dir_y = (float*)&t.dir.y;
- dir_z = (float*)&t.dir.z;
- time = (float*)&t.time;
- tfar = (float*)&t.tfar;
- mask = (unsigned int*)&t.mask;
- id = (unsigned int*)&t.id;
- flags = (unsigned int*)&t.flags;
-
- Ng_x = (float*)&t.Ng.x;
- Ng_y = (float*)&t.Ng.y;
- Ng_z = (float*)&t.Ng.z;
- u = (float*)&t.u;
- v = (float*)&t.v;
- primID = (unsigned int*)&t.primID;
- geomID = (unsigned int*)&t.geomID;
-
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
- instID[l] = (unsigned int*)&t.instID[l];
- }
-
- __forceinline Ray getRayByOffset(size_t offset)
- {
- Ray ray;
- ray.org.x = *(float* __restrict__)((char*)org_x + offset);
- ray.org.y = *(float* __restrict__)((char*)org_y + offset);
- ray.org.z = *(float* __restrict__)((char*)org_z + offset);
- ray.dir.x = *(float* __restrict__)((char*)dir_x + offset);
- ray.dir.y = *(float* __restrict__)((char*)dir_y + offset);
- ray.dir.z = *(float* __restrict__)((char*)dir_z + offset);
- ray.tfar = *(float* __restrict__)((char*)tfar + offset);
- ray.tnear() = tnear ? *(float* __restrict__)((char*)tnear + offset) : 0.0f;
- ray.time() = time ? *(float* __restrict__)((char*)time + offset) : 0.0f;
- ray.mask = mask ? *(unsigned int* __restrict__)((char*)mask + offset) : -1;
- ray.id = id ? *(unsigned int* __restrict__)((char*)id + offset) : -1;
- ray.flags = flags ? *(unsigned int* __restrict__)((char*)flags + offset) : -1;
- return ray;
- }
-
- template<int K>
- __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, size_t offset)
- {
- RayK<K> ray;
- ray.org.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_x + offset));
- ray.org.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_y + offset));
- ray.org.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_z + offset));
- ray.dir.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_x + offset));
- ray.dir.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_y + offset));
- ray.dir.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_z + offset));
- ray.tfar = vfloat<K>::loadu(valid, (float* __restrict__)((char*)tfar + offset));
- ray.tnear() = tnear ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)tnear + offset)) : 0.0f;
- ray.time() = time ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)time + offset)) : 0.0f;
- ray.mask = mask ? vint<K>::loadu(valid, (const void* __restrict__)((char*)mask + offset)) : -1;
- ray.id = id ? vint<K>::loadu(valid, (const void* __restrict__)((char*)id + offset)) : -1;
- ray.flags = flags ? vint<K>::loadu(valid, (const void* __restrict__)((char*)flags + offset)) : -1;
- return ray;
- }
-
- template<int K>
- __forceinline Vec3vf<K> getDirByOffset(const vbool<K>& valid, size_t offset)
- {
- Vec3vf<K> dir;
- dir.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_x + offset));
- dir.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_y + offset));
- dir.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_z + offset));
- return dir;
- }
-
- __forceinline void setHitByOffset(size_t offset, const RayHit& ray)
- {
- if (ray.geomID != RTC_INVALID_GEOMETRY_ID)
- {
- *(float* __restrict__)((char*)tfar + offset) = ray.tfar;
-
- if (likely(Ng_x)) *(float* __restrict__)((char*)Ng_x + offset) = ray.Ng.x;
- if (likely(Ng_y)) *(float* __restrict__)((char*)Ng_y + offset) = ray.Ng.y;
- if (likely(Ng_z)) *(float* __restrict__)((char*)Ng_z + offset) = ray.Ng.z;
- *(float* __restrict__)((char*)u + offset) = ray.u;
- *(float* __restrict__)((char*)v + offset) = ray.v;
- *(unsigned int* __restrict__)((char*)geomID + offset) = ray.geomID;
- *(unsigned int* __restrict__)((char*)primID + offset) = ray.primID;
-
- if (likely(instID[0])) {
- *(unsigned int* __restrict__)((char*)instID[0] + offset) = ray.instID[0];
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID; ++l)
- *(unsigned int* __restrict__)((char*)instID[l] + offset) = ray.instID[l];
-#endif
- }
- }
- }
-
- __forceinline void setHitByOffset(size_t offset, const Ray& ray)
- {
- *(float* __restrict__)((char*)tfar + offset) = ray.tfar;
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayHitK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
-
- if (likely(any(valid)))
- {
- vfloat<K>::storeu(valid, (float* __restrict__)((char*)tfar + offset), ray.tfar);
-
- if (likely(Ng_x)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_x + offset), ray.Ng.x);
- if (likely(Ng_y)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_y + offset), ray.Ng.y);
- if (likely(Ng_z)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_z + offset), ray.Ng.z);
- vfloat<K>::storeu(valid, (float* __restrict__)((char*)u + offset), ray.u);
- vfloat<K>::storeu(valid, (float* __restrict__)((char*)v + offset), ray.v);
- vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)primID + offset), ray.primID);
- vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)geomID + offset), ray.geomID);
-
- if (likely(instID[0])) {
- vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instID[0] + offset), ray.instID[0]);
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
- vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instID[l] + offset), ray.instID[l]);
-#endif
- }
- }
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.tfar < 0.0f);
-
- if (likely(any(valid)))
- vfloat<K>::storeu(valid, (float* __restrict__)((char*)tfar + offset), ray.tfar);
- }
-
- __forceinline size_t getOctantByOffset(size_t offset)
- {
- const float dx = *(float* __restrict__)((char*)dir_x + offset);
- const float dy = *(float* __restrict__)((char*)dir_y + offset);
- const float dz = *(float* __restrict__)((char*)dir_z + offset);
- const size_t octantID = (dx < 0.0f ? 1 : 0) + (dy < 0.0f ? 2 : 0) + (dz < 0.0f ? 4 : 0);
- return octantID;
- }
-
- __forceinline bool isValidByOffset(size_t offset)
- {
- const float nnear = tnear ? *(float* __restrict__)((char*)tnear + offset) : 0.0f;
- const float ffar = *(float* __restrict__)((char*)tfar + offset);
- return nnear <= ffar;
- }
-
- template<int K>
- __forceinline vbool<K> isValidByOffset(const vbool<K>& valid, size_t offset)
- {
- const vfloat<K> nnear = tnear ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)tnear + offset)) : 0.0f;
- const vfloat<K> ffar = vfloat<K>::loadu(valid, (float* __restrict__)((char*)tfar + offset));
- return nnear <= ffar;
- }
-
- template<int K>
- __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
- {
- RayK<K> ray;
-
-#if defined(__AVX2__)
- ray.org.x = vfloat<K>::template gather<1>(valid, org_x, offset);
- ray.org.y = vfloat<K>::template gather<1>(valid, org_y, offset);
- ray.org.z = vfloat<K>::template gather<1>(valid, org_z, offset);
- ray.dir.x = vfloat<K>::template gather<1>(valid, dir_x, offset);
- ray.dir.y = vfloat<K>::template gather<1>(valid, dir_y, offset);
- ray.dir.z = vfloat<K>::template gather<1>(valid, dir_z, offset);
- ray.tfar = vfloat<K>::template gather<1>(valid, tfar, offset);
- ray.tnear() = tnear ? vfloat<K>::template gather<1>(valid, tnear, offset) : vfloat<K>(zero);
- ray.time() = time ? vfloat<K>::template gather<1>(valid, time, offset) : vfloat<K>(zero);
- ray.mask = mask ? vint<K>::template gather<1>(valid, (int*)mask, offset) : vint<K>(-1);
- ray.id = id ? vint<K>::template gather<1>(valid, (int*)id, offset) : vint<K>(-1);
- ray.flags = flags ? vint<K>::template gather<1>(valid, (int*)flags, offset) : vint<K>(-1);
-#else
- ray.org = zero;
- ray.tnear() = zero;
- ray.dir = zero;
- ray.tfar = zero;
- ray.time() = zero;
- ray.mask = zero;
- ray.id = zero;
- ray.flags = zero;
-
- for (size_t k = 0; k < K; k++)
- {
- if (likely(valid[k]))
- {
- const size_t ofs = offset[k];
-
- ray.org.x[k] = *(float* __restrict__)((char*)org_x + ofs);
- ray.org.y[k] = *(float* __restrict__)((char*)org_y + ofs);
- ray.org.z[k] = *(float* __restrict__)((char*)org_z + ofs);
- ray.dir.x[k] = *(float* __restrict__)((char*)dir_x + ofs);
- ray.dir.y[k] = *(float* __restrict__)((char*)dir_y + ofs);
- ray.dir.z[k] = *(float* __restrict__)((char*)dir_z + ofs);
- ray.tfar[k] = *(float* __restrict__)((char*)tfar + ofs);
- ray.tnear()[k] = tnear ? *(float* __restrict__)((char*)tnear + ofs) : 0.0f;
- ray.time()[k] = time ? *(float* __restrict__)((char*)time + ofs) : 0.0f;
- ray.mask[k] = mask ? *(int* __restrict__)((char*)mask + ofs) : -1;
- ray.id[k] = id ? *(int* __restrict__)((char*)id + ofs) : -1;
- ray.flags[k] = flags ? *(int* __restrict__)((char*)flags + ofs) : -1;
- }
- }
-#endif
-
- return ray;
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
-
- if (likely(any(valid)))
- {
-#if defined(__AVX512F__)
- vfloat<K>::template scatter<1>(valid, tfar, offset, ray.tfar);
-
- if (likely(Ng_x)) vfloat<K>::template scatter<1>(valid, Ng_x, offset, ray.Ng.x);
- if (likely(Ng_y)) vfloat<K>::template scatter<1>(valid, Ng_y, offset, ray.Ng.y);
- if (likely(Ng_z)) vfloat<K>::template scatter<1>(valid, Ng_z, offset, ray.Ng.z);
- vfloat<K>::template scatter<1>(valid, u, offset, ray.u);
- vfloat<K>::template scatter<1>(valid, v, offset, ray.v);
- vuint<K>::template scatter<1>(valid, (unsigned int*)geomID, offset, ray.geomID);
- vuint<K>::template scatter<1>(valid, (unsigned int*)primID, offset, ray.primID);
-
- if (likely(instID[0])) {
- vuint<K>::template scatter<1>(valid, (unsigned int*)instID[0], offset, ray.instID[0]);
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
- vuint<K>::template scatter<1>(valid, (unsigned int*)instID[l], offset, ray.instID[l]);
-#endif
- }
-#else
- size_t valid_bits = movemask(valid);
- while (valid_bits != 0)
- {
- const size_t k = bscf(valid_bits);
- const size_t ofs = offset[k];
-
- *(float* __restrict__)((char*)tfar + ofs) = ray.tfar[k];
-
- if (likely(Ng_x)) *(float* __restrict__)((char*)Ng_x + ofs) = ray.Ng.x[k];
- if (likely(Ng_y)) *(float* __restrict__)((char*)Ng_y + ofs) = ray.Ng.y[k];
- if (likely(Ng_z)) *(float* __restrict__)((char*)Ng_z + ofs) = ray.Ng.z[k];
- *(float* __restrict__)((char*)u + ofs) = ray.u[k];
- *(float* __restrict__)((char*)v + ofs) = ray.v[k];
- *(unsigned int* __restrict__)((char*)primID + ofs) = ray.primID[k];
- *(unsigned int* __restrict__)((char*)geomID + ofs) = ray.geomID[k];
-
- if (likely(instID[0])) {
- *(unsigned int* __restrict__)((char*)instID[0] + ofs) = ray.instID[0][k];
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l)
- *(unsigned int* __restrict__)((char*)instID[l] + ofs) = ray.instID[l][k];
-#endif
- }
- }
-#endif
- }
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.tfar < 0.0f);
-
- if (likely(any(valid)))
- {
-#if defined(__AVX512F__)
- vfloat<K>::template scatter<1>(valid, tfar, offset, ray.tfar);
-#else
- size_t valid_bits = movemask(valid);
- while (valid_bits != 0)
- {
- const size_t k = bscf(valid_bits);
- const size_t ofs = offset[k];
-
- *(float* __restrict__)((char*)tfar + ofs) = ray.tfar[k];
- }
-#endif
- }
- }
-
- /* ray data */
- float* __restrict__ org_x; // x coordinate of ray origin
- float* __restrict__ org_y; // y coordinate of ray origin
- float* __restrict__ org_z; // z coordinate of ray origin
- float* __restrict__ tnear; // start of ray segment (optional)
-
- float* __restrict__ dir_x; // x coordinate of ray direction
- float* __restrict__ dir_y; // y coordinate of ray direction
- float* __restrict__ dir_z; // z coordinate of ray direction
- float* __restrict__ time; // time of this ray for motion blur (optional)
-
- float* __restrict__ tfar; // end of ray segment (set to hit distance)
- unsigned int* __restrict__ mask; // used to mask out objects during traversal (optional)
- unsigned int* __restrict__ id; // ray ID
- unsigned int* __restrict__ flags; // ray flags
-
- /* hit data */
- float* __restrict__ Ng_x; // x coordinate of geometry normal (optional)
- float* __restrict__ Ng_y; // y coordinate of geometry normal (optional)
- float* __restrict__ Ng_z; // z coordinate of geometry normal (optional)
-
- float* __restrict__ u; // barycentric u coordinate of hit
- float* __restrict__ v; // barycentric v coordinate of hit
-
- unsigned int* __restrict__ primID; // primitive ID
- unsigned int* __restrict__ geomID; // geometry ID
- unsigned int* __restrict__ instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID (optional)
- };
-
-
- struct RayStreamAOS
- {
- __forceinline RayStreamAOS(void* rays)
- : ptr((Ray*)rays) {}
-
- __forceinline Ray& getRayByOffset(size_t offset)
- {
- return *(Ray*)((char*)ptr + offset);
- }
-
- template<int K>
- __forceinline RayK<K> getRayByOffset(const vint<K>& offset);
-
- template<int K>
- __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
- {
- const vint<K> valid_offset = select(valid, offset, vintx(zero));
- return getRayByOffset(valid_offset);
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
-
- if (likely(any(valid)))
- {
-#if defined(__AVX512F__)
- vfloat<K>::template scatter<1>(valid, &ptr->tfar, offset, ray.tfar);
- vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.x, offset, ray.Ng.x);
- vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.y, offset, ray.Ng.y);
- vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.z, offset, ray.Ng.z);
- vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->u, offset, ray.u);
- vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->v, offset, ray.v);
- vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->primID, offset, ray.primID);
- vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->geomID, offset, ray.geomID);
-
- vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instID[0], offset, ray.instID[0]);
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
- vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instID[l], offset, ray.instID[l]);
-#endif
-#else
- size_t valid_bits = movemask(valid);
- while (valid_bits != 0)
- {
- const size_t k = bscf(valid_bits);
- RayHit* __restrict__ ray_k = (RayHit*)((char*)ptr + offset[k]);
- ray_k->tfar = ray.tfar[k];
- ray_k->Ng.x = ray.Ng.x[k];
- ray_k->Ng.y = ray.Ng.y[k];
- ray_k->Ng.z = ray.Ng.z[k];
- ray_k->u = ray.u[k];
- ray_k->v = ray.v[k];
- ray_k->primID = ray.primID[k];
- ray_k->geomID = ray.geomID[k];
-
- instance_id_stack::copy(ray.instID, ray_k->instID, k);
- }
-#endif
- }
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.tfar < 0.0f);
-
- if (likely(any(valid)))
- {
-#if defined(__AVX512F__)
- vfloat<K>::template scatter<1>(valid, &ptr->tfar, offset, ray.tfar);
-#else
- size_t valid_bits = movemask(valid);
- while (valid_bits != 0)
- {
- const size_t k = bscf(valid_bits);
- Ray* __restrict__ ray_k = (Ray*)((char*)ptr + offset[k]);
- ray_k->tfar = ray.tfar[k];
- }
-#endif
- }
- }
-
- Ray* __restrict__ ptr;
- };
-
- template<>
- __forceinline Ray4 RayStreamAOS::getRayByOffset(const vint4& offset)
- {
- Ray4 ray;
-
- /* load and transpose: org.x, org.y, org.z, tnear */
- const vfloat4 a0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->org);
- const vfloat4 a1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->org);
- const vfloat4 a2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->org);
- const vfloat4 a3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->org);
-
- transpose(a0,a1,a2,a3, ray.org.x, ray.org.y, ray.org.z, ray.tnear());
-
- /* load and transpose: dir.x, dir.y, dir.z, time */
- const vfloat4 b0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->dir);
- const vfloat4 b1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->dir);
- const vfloat4 b2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->dir);
- const vfloat4 b3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->dir);
-
- transpose(b0,b1,b2,b3, ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
-
- /* load and transpose: tfar, mask, id, flags */
- const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->tfar);
- const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->tfar);
- const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->tfar);
- const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->tfar);
-
- vfloat4 maskf, idf, flagsf;
- transpose(c0,c1,c2,c3, ray.tfar, maskf, idf, flagsf);
- ray.mask = asInt(maskf);
- ray.id = asInt(idf);
- ray.flags = asInt(flagsf);
-
- return ray;
- }
-
-#if defined(__AVX__)
- template<>
- __forceinline Ray8 RayStreamAOS::getRayByOffset(const vint8& offset)
- {
- Ray8 ray;
-
- /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
- const vfloat8 ab0 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[0]))->org);
- const vfloat8 ab1 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[1]))->org);
- const vfloat8 ab2 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[2]))->org);
- const vfloat8 ab3 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[3]))->org);
- const vfloat8 ab4 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[4]))->org);
- const vfloat8 ab5 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[5]))->org);
- const vfloat8 ab6 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[6]))->org);
- const vfloat8 ab7 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[7]))->org);
-
- transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
-
- /* load and transpose: tfar, mask, id, flags */
- const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->tfar);
- const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->tfar);
- const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->tfar);
- const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->tfar);
- const vfloat4 c4 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[4]))->tfar);
- const vfloat4 c5 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[5]))->tfar);
- const vfloat4 c6 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[6]))->tfar);
- const vfloat4 c7 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[7]))->tfar);
-
- vfloat8 maskf, idf, flagsf;
- transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf);
- ray.mask = asInt(maskf);
- ray.id = asInt(idf);
- ray.flags = asInt(flagsf);
-
- return ray;
- }
-#endif
-
-#if defined(__AVX512F__)
- template<>
- __forceinline Ray16 RayStreamAOS::getRayByOffset(const vint16& offset)
- {
- Ray16 ray;
-
- /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
- const vfloat8 ab0 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 0]))->org);
- const vfloat8 ab1 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 1]))->org);
- const vfloat8 ab2 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 2]))->org);
- const vfloat8 ab3 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 3]))->org);
- const vfloat8 ab4 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 4]))->org);
- const vfloat8 ab5 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 5]))->org);
- const vfloat8 ab6 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 6]))->org);
- const vfloat8 ab7 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 7]))->org);
- const vfloat8 ab8 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 8]))->org);
- const vfloat8 ab9 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 9]))->org);
- const vfloat8 ab10 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[10]))->org);
- const vfloat8 ab11 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[11]))->org);
- const vfloat8 ab12 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[12]))->org);
- const vfloat8 ab13 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[13]))->org);
- const vfloat8 ab14 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[14]))->org);
- const vfloat8 ab15 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[15]))->org);
-
- transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15,
- ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
-
- /* load and transpose: tfar, mask, id, flags */
- const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 0]))->tfar);
- const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 1]))->tfar);
- const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 2]))->tfar);
- const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 3]))->tfar);
- const vfloat4 c4 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 4]))->tfar);
- const vfloat4 c5 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 5]))->tfar);
- const vfloat4 c6 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 6]))->tfar);
- const vfloat4 c7 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 7]))->tfar);
- const vfloat4 c8 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 8]))->tfar);
- const vfloat4 c9 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 9]))->tfar);
- const vfloat4 c10 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[10]))->tfar);
- const vfloat4 c11 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[11]))->tfar);
- const vfloat4 c12 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[12]))->tfar);
- const vfloat4 c13 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[13]))->tfar);
- const vfloat4 c14 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[14]))->tfar);
- const vfloat4 c15 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[15]))->tfar);
-
- vfloat16 maskf, idf, flagsf;
- transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,
- ray.tfar, maskf, idf, flagsf);
- ray.mask = asInt(maskf);
- ray.id = asInt(idf);
- ray.flags = asInt(flagsf);
-
- return ray;
- }
-#endif
-
-
- struct RayStreamAOP
- {
- __forceinline RayStreamAOP(void* rays)
- : ptr((Ray**)rays) {}
-
- __forceinline Ray& getRayByIndex(size_t index)
- {
- return *ptr[index];
- }
-
- template<int K>
- __forceinline RayK<K> getRayByIndex(const vint<K>& index);
-
- template<int K>
- __forceinline RayK<K> getRayByIndex(const vbool<K>& valid, const vint<K>& index)
- {
- const vint<K> valid_index = select(valid, index, vintx(zero));
- return getRayByIndex(valid_index);
- }
-
- template<int K>
- __forceinline void setHitByIndex(const vbool<K>& valid_i, const vint<K>& index, const RayHitK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
-
- if (likely(any(valid)))
- {
- size_t valid_bits = movemask(valid);
- while (valid_bits != 0)
- {
- const size_t k = bscf(valid_bits);
- RayHit* __restrict__ ray_k = (RayHit*)ptr[index[k]];
-
- ray_k->tfar = ray.tfar[k];
- ray_k->Ng.x = ray.Ng.x[k];
- ray_k->Ng.y = ray.Ng.y[k];
- ray_k->Ng.z = ray.Ng.z[k];
- ray_k->u = ray.u[k];
- ray_k->v = ray.v[k];
- ray_k->primID = ray.primID[k];
- ray_k->geomID = ray.geomID[k];
- instance_id_stack::copy(ray.instID, ray_k->instID, k);
- }
- }
- }
-
- template<int K>
- __forceinline void setHitByIndex(const vbool<K>& valid_i, const vint<K>& index, const RayK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.tfar < 0.0f);
-
- if (likely(any(valid)))
- {
- size_t valid_bits = movemask(valid);
- while (valid_bits != 0)
- {
- const size_t k = bscf(valid_bits);
- Ray* __restrict__ ray_k = ptr[index[k]];
-
- ray_k->tfar = ray.tfar[k];
- }
- }
- }
-
- Ray** __restrict__ ptr;
- };
-
- template<>
- __forceinline Ray4 RayStreamAOP::getRayByIndex(const vint4& index)
- {
- Ray4 ray;
-
- /* load and transpose: org.x, org.y, org.z, tnear */
- const vfloat4 a0 = vfloat4::loadu(&ptr[index[0]]->org);
- const vfloat4 a1 = vfloat4::loadu(&ptr[index[1]]->org);
- const vfloat4 a2 = vfloat4::loadu(&ptr[index[2]]->org);
- const vfloat4 a3 = vfloat4::loadu(&ptr[index[3]]->org);
-
- transpose(a0,a1,a2,a3, ray.org.x, ray.org.y, ray.org.z, ray.tnear());
-
- /* load and transpose: dir.x, dir.y, dir.z, time */
- const vfloat4 b0 = vfloat4::loadu(&ptr[index[0]]->dir);
- const vfloat4 b1 = vfloat4::loadu(&ptr[index[1]]->dir);
- const vfloat4 b2 = vfloat4::loadu(&ptr[index[2]]->dir);
- const vfloat4 b3 = vfloat4::loadu(&ptr[index[3]]->dir);
-
- transpose(b0,b1,b2,b3, ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
-
- /* load and transpose: tfar, mask, id, flags */
- const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
- const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
- const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
- const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
-
- vfloat4 maskf, idf, flagsf;
- transpose(c0,c1,c2,c3, ray.tfar, maskf, idf, flagsf);
- ray.mask = asInt(maskf);
- ray.id = asInt(idf);
- ray.flags = asInt(flagsf);
-
- return ray;
- }
-
-#if defined(__AVX__)
- template<>
- __forceinline Ray8 RayStreamAOP::getRayByIndex(const vint8& index)
- {
- Ray8 ray;
-
- /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
- const vfloat8 ab0 = vfloat8::loadu(&ptr[index[0]]->org);
- const vfloat8 ab1 = vfloat8::loadu(&ptr[index[1]]->org);
- const vfloat8 ab2 = vfloat8::loadu(&ptr[index[2]]->org);
- const vfloat8 ab3 = vfloat8::loadu(&ptr[index[3]]->org);
- const vfloat8 ab4 = vfloat8::loadu(&ptr[index[4]]->org);
- const vfloat8 ab5 = vfloat8::loadu(&ptr[index[5]]->org);
- const vfloat8 ab6 = vfloat8::loadu(&ptr[index[6]]->org);
- const vfloat8 ab7 = vfloat8::loadu(&ptr[index[7]]->org);
-
- transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
-
- /* load and transpose: tfar, mask, id, flags */
- const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
- const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
- const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
- const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
- const vfloat4 c4 = vfloat4::loadu(&ptr[index[4]]->tfar);
- const vfloat4 c5 = vfloat4::loadu(&ptr[index[5]]->tfar);
- const vfloat4 c6 = vfloat4::loadu(&ptr[index[6]]->tfar);
- const vfloat4 c7 = vfloat4::loadu(&ptr[index[7]]->tfar);
-
- vfloat8 maskf, idf, flagsf;
- transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf);
- ray.mask = asInt(maskf);
- ray.id = asInt(idf);
- ray.flags = asInt(flagsf);
-
- return ray;
- }
-#endif
-
-#if defined(__AVX512F__)
- template<>
- __forceinline Ray16 RayStreamAOP::getRayByIndex(const vint16& index)
- {
- Ray16 ray;
-
- /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
- const vfloat8 ab0 = vfloat8::loadu(&ptr[index[0]]->org);
- const vfloat8 ab1 = vfloat8::loadu(&ptr[index[1]]->org);
- const vfloat8 ab2 = vfloat8::loadu(&ptr[index[2]]->org);
- const vfloat8 ab3 = vfloat8::loadu(&ptr[index[3]]->org);
- const vfloat8 ab4 = vfloat8::loadu(&ptr[index[4]]->org);
- const vfloat8 ab5 = vfloat8::loadu(&ptr[index[5]]->org);
- const vfloat8 ab6 = vfloat8::loadu(&ptr[index[6]]->org);
- const vfloat8 ab7 = vfloat8::loadu(&ptr[index[7]]->org);
- const vfloat8 ab8 = vfloat8::loadu(&ptr[index[8]]->org);
- const vfloat8 ab9 = vfloat8::loadu(&ptr[index[9]]->org);
- const vfloat8 ab10 = vfloat8::loadu(&ptr[index[10]]->org);
- const vfloat8 ab11 = vfloat8::loadu(&ptr[index[11]]->org);
- const vfloat8 ab12 = vfloat8::loadu(&ptr[index[12]]->org);
- const vfloat8 ab13 = vfloat8::loadu(&ptr[index[13]]->org);
- const vfloat8 ab14 = vfloat8::loadu(&ptr[index[14]]->org);
- const vfloat8 ab15 = vfloat8::loadu(&ptr[index[15]]->org);
-
- transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15,
- ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
-
- /* load and transpose: tfar, mask, id, flags */
- const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
- const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
- const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
- const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
- const vfloat4 c4 = vfloat4::loadu(&ptr[index[4]]->tfar);
- const vfloat4 c5 = vfloat4::loadu(&ptr[index[5]]->tfar);
- const vfloat4 c6 = vfloat4::loadu(&ptr[index[6]]->tfar);
- const vfloat4 c7 = vfloat4::loadu(&ptr[index[7]]->tfar);
- const vfloat4 c8 = vfloat4::loadu(&ptr[index[8]]->tfar);
- const vfloat4 c9 = vfloat4::loadu(&ptr[index[9]]->tfar);
- const vfloat4 c10 = vfloat4::loadu(&ptr[index[10]]->tfar);
- const vfloat4 c11 = vfloat4::loadu(&ptr[index[11]]->tfar);
- const vfloat4 c12 = vfloat4::loadu(&ptr[index[12]]->tfar);
- const vfloat4 c13 = vfloat4::loadu(&ptr[index[13]]->tfar);
- const vfloat4 c14 = vfloat4::loadu(&ptr[index[14]]->tfar);
- const vfloat4 c15 = vfloat4::loadu(&ptr[index[15]]->tfar);
-
- vfloat16 maskf, idf, flagsf;
- transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,
- ray.tfar, maskf, idf, flagsf);
-
- ray.mask = asInt(maskf);
- ray.id = asInt(idf);
- ray.flags = asInt(flagsf);
-
- return ray;
- }
-#endif
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/rtcore.cpp b/thirdparty/embree-aarch64/kernels/common/rtcore.cpp
deleted file mode 100644
index 625fbf6d4f..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/rtcore.cpp
+++ /dev/null
@@ -1,1799 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#define RTC_EXPORT_API
-
-#include "default.h"
-#include "device.h"
-#include "scene.h"
-#include "context.h"
-#include "../../include/embree3/rtcore_ray.h"
-
-#if defined(__aarch64__) && defined(BUILD_IOS)
-#include <mutex>
-#endif
-
-using namespace embree;
-
-RTC_NAMESPACE_BEGIN;
-
- /* mutex to make API thread safe */
-#if defined(__aarch64__) && defined(BUILD_IOS)
- static std::mutex g_mutex;
-#else
- static MutexSys g_mutex;
-#endif
-
- RTC_API RTCDevice rtcNewDevice(const char* config)
- {
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcNewDevice);
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(g_mutex);
-#else
- Lock<MutexSys> lock(g_mutex);
-#endif
- Device* device = new Device(config);
- return (RTCDevice) device->refInc();
- RTC_CATCH_END(nullptr);
- return (RTCDevice) nullptr;
- }
-
- RTC_API void rtcRetainDevice(RTCDevice hdevice)
- {
- Device* device = (Device*) hdevice;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcRetainDevice);
- RTC_VERIFY_HANDLE(hdevice);
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(g_mutex);
-#else
- Lock<MutexSys> lock(g_mutex);
-#endif
- device->refInc();
- RTC_CATCH_END(nullptr);
- }
-
- RTC_API void rtcReleaseDevice(RTCDevice hdevice)
- {
- Device* device = (Device*) hdevice;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcReleaseDevice);
- RTC_VERIFY_HANDLE(hdevice);
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(g_mutex);
-#else
- Lock<MutexSys> lock(g_mutex);
-#endif
- device->refDec();
- RTC_CATCH_END(nullptr);
- }
-
- RTC_API ssize_t rtcGetDeviceProperty(RTCDevice hdevice, RTCDeviceProperty prop)
- {
- Device* device = (Device*) hdevice;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetDeviceProperty);
- RTC_VERIFY_HANDLE(hdevice);
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(g_mutex);
-#else
- Lock<MutexSys> lock(g_mutex);
-#endif
- return device->getProperty(prop);
- RTC_CATCH_END(device);
- return 0;
- }
-
- RTC_API void rtcSetDeviceProperty(RTCDevice hdevice, const RTCDeviceProperty prop, ssize_t val)
- {
- Device* device = (Device*) hdevice;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetDeviceProperty);
- const bool internal_prop = (size_t)prop >= 1000000 && (size_t)prop < 1000004;
- if (!internal_prop) RTC_VERIFY_HANDLE(hdevice); // allow NULL device for special internal settings
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(g_mutex);
-#else
- Lock<MutexSys> lock(g_mutex);
-#endif
- device->setProperty(prop,val);
- RTC_CATCH_END(device);
- }
-
- RTC_API RTCError rtcGetDeviceError(RTCDevice hdevice)
- {
- Device* device = (Device*) hdevice;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetDeviceError);
- if (device == nullptr) return Device::getThreadErrorCode();
- else return device->getDeviceErrorCode();
- RTC_CATCH_END(device);
- return RTC_ERROR_UNKNOWN;
- }
-
- RTC_API void rtcSetDeviceErrorFunction(RTCDevice hdevice, RTCErrorFunction error, void* userPtr)
- {
- Device* device = (Device*) hdevice;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetDeviceErrorFunction);
- RTC_VERIFY_HANDLE(hdevice);
- device->setErrorFunction(error, userPtr);
- RTC_CATCH_END(device);
- }
-
- RTC_API void rtcSetDeviceMemoryMonitorFunction(RTCDevice hdevice, RTCMemoryMonitorFunction memoryMonitor, void* userPtr)
- {
- Device* device = (Device*) hdevice;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetDeviceMemoryMonitorFunction);
- device->setMemoryMonitorFunction(memoryMonitor, userPtr);
- RTC_CATCH_END(device);
- }
-
- RTC_API RTCBuffer rtcNewBuffer(RTCDevice hdevice, size_t byteSize)
- {
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcNewBuffer);
- RTC_VERIFY_HANDLE(hdevice);
- Buffer* buffer = new Buffer((Device*)hdevice, byteSize);
- return (RTCBuffer)buffer->refInc();
- RTC_CATCH_END((Device*)hdevice);
- return nullptr;
- }
-
- RTC_API RTCBuffer rtcNewSharedBuffer(RTCDevice hdevice, void* ptr, size_t byteSize)
- {
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcNewSharedBuffer);
- RTC_VERIFY_HANDLE(hdevice);
- Buffer* buffer = new Buffer((Device*)hdevice, byteSize, ptr);
- return (RTCBuffer)buffer->refInc();
- RTC_CATCH_END((Device*)hdevice);
- return nullptr;
- }
-
- RTC_API void* rtcGetBufferData(RTCBuffer hbuffer)
- {
- Buffer* buffer = (Buffer*)hbuffer;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetBufferData);
- RTC_VERIFY_HANDLE(hbuffer);
- return buffer->data();
- RTC_CATCH_END2(buffer);
- return nullptr;
- }
-
- RTC_API void rtcRetainBuffer(RTCBuffer hbuffer)
- {
- Buffer* buffer = (Buffer*)hbuffer;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcRetainBuffer);
- RTC_VERIFY_HANDLE(hbuffer);
- buffer->refInc();
- RTC_CATCH_END2(buffer);
- }
-
- RTC_API void rtcReleaseBuffer(RTCBuffer hbuffer)
- {
- Buffer* buffer = (Buffer*)hbuffer;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcReleaseBuffer);
- RTC_VERIFY_HANDLE(hbuffer);
- buffer->refDec();
- RTC_CATCH_END2(buffer);
- }
-
- RTC_API RTCScene rtcNewScene (RTCDevice hdevice)
- {
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcNewScene);
- RTC_VERIFY_HANDLE(hdevice);
- Scene* scene = new Scene((Device*)hdevice);
- return (RTCScene) scene->refInc();
- RTC_CATCH_END((Device*)hdevice);
- return nullptr;
- }
-
- RTC_API RTCDevice rtcGetSceneDevice(RTCScene hscene)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetSceneDevice);
- RTC_VERIFY_HANDLE(hscene);
- return (RTCDevice)scene->device->refInc(); // user will own one additional device reference
- RTC_CATCH_END2(scene);
- return (RTCDevice)nullptr;
- }
-
- RTC_API void rtcSetSceneProgressMonitorFunction(RTCScene hscene, RTCProgressMonitorFunction progress, void* ptr)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetSceneProgressMonitorFunction);
- RTC_VERIFY_HANDLE(hscene);
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(g_mutex);
-#else
- Lock<MutexSys> lock(g_mutex);
-#endif
- scene->setProgressMonitorFunction(progress,ptr);
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcSetSceneBuildQuality (RTCScene hscene, RTCBuildQuality quality)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetSceneBuildQuality);
- RTC_VERIFY_HANDLE(hscene);
- if (quality != RTC_BUILD_QUALITY_LOW &&
- quality != RTC_BUILD_QUALITY_MEDIUM &&
- quality != RTC_BUILD_QUALITY_HIGH)
- // -- GODOT start --
- // throw std::runtime_error("invalid build quality");
- abort();
- // -- GODOT end --
- scene->setBuildQuality(quality);
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcSetSceneFlags (RTCScene hscene, RTCSceneFlags flags)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetSceneFlags);
- RTC_VERIFY_HANDLE(hscene);
- scene->setSceneFlags(flags);
- RTC_CATCH_END2(scene);
- }
-
- RTC_API RTCSceneFlags rtcGetSceneFlags(RTCScene hscene)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetSceneFlags);
- RTC_VERIFY_HANDLE(hscene);
- return scene->getSceneFlags();
- RTC_CATCH_END2(scene);
- return RTC_SCENE_FLAG_NONE;
- }
-
- RTC_API void rtcCommitScene (RTCScene hscene)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcCommitScene);
- RTC_VERIFY_HANDLE(hscene);
- scene->commit(false);
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcJoinCommitScene (RTCScene hscene)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcJoinCommitScene);
- RTC_VERIFY_HANDLE(hscene);
- scene->commit(true);
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcGetSceneBounds(RTCScene hscene, RTCBounds* bounds_o)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetSceneBounds);
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- BBox3fa bounds = scene->bounds.bounds();
- bounds_o->lower_x = bounds.lower.x;
- bounds_o->lower_y = bounds.lower.y;
- bounds_o->lower_z = bounds.lower.z;
- bounds_o->align0 = 0;
- bounds_o->upper_x = bounds.upper.x;
- bounds_o->upper_y = bounds.upper.y;
- bounds_o->upper_z = bounds.upper.z;
- bounds_o->align1 = 0;
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcGetSceneLinearBounds(RTCScene hscene, RTCLinearBounds* bounds_o)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetSceneBounds);
- RTC_VERIFY_HANDLE(hscene);
- if (bounds_o == nullptr)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid destination pointer");
- if (scene->isModified())
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
-
- bounds_o->bounds0.lower_x = scene->bounds.bounds0.lower.x;
- bounds_o->bounds0.lower_y = scene->bounds.bounds0.lower.y;
- bounds_o->bounds0.lower_z = scene->bounds.bounds0.lower.z;
- bounds_o->bounds0.align0 = 0;
- bounds_o->bounds0.upper_x = scene->bounds.bounds0.upper.x;
- bounds_o->bounds0.upper_y = scene->bounds.bounds0.upper.y;
- bounds_o->bounds0.upper_z = scene->bounds.bounds0.upper.z;
- bounds_o->bounds0.align1 = 0;
- bounds_o->bounds1.lower_x = scene->bounds.bounds1.lower.x;
- bounds_o->bounds1.lower_y = scene->bounds.bounds1.lower.y;
- bounds_o->bounds1.lower_z = scene->bounds.bounds1.lower.z;
- bounds_o->bounds1.align0 = 0;
- bounds_o->bounds1.upper_x = scene->bounds.bounds1.upper.x;
- bounds_o->bounds1.upper_y = scene->bounds.bounds1.upper.y;
- bounds_o->bounds1.upper_z = scene->bounds.bounds1.upper.z;
- bounds_o->bounds1.align1 = 0;
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcCollide (RTCScene hscene0, RTCScene hscene1, RTCCollideFunc callback, void* userPtr)
- {
- Scene* scene0 = (Scene*) hscene0;
- Scene* scene1 = (Scene*) hscene1;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcCollide);
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene0);
- RTC_VERIFY_HANDLE(hscene1);
- if (scene0->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed");
- if (scene1->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed");
- if (scene0->device != scene1->device) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scenes are from different devices");
- auto nUserPrims0 = scene0->getNumPrimitives (Geometry::MTY_USER_GEOMETRY, false);
- auto nUserPrims1 = scene1->getNumPrimitives (Geometry::MTY_USER_GEOMETRY, false);
- if (scene0->numPrimitives() != nUserPrims0 && scene1->numPrimitives() != nUserPrims1) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scenes must only contain user geometries with a single timestep");
-#endif
- scene0->intersectors.collide(scene0,scene1,callback,userPtr);
- RTC_CATCH_END(scene0->device);
- }
-
- inline bool pointQuery(Scene* scene, RTCPointQuery* query, RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void* userPtr)
- {
- bool changed = false;
- if (userContext->instStackSize > 0)
- {
- const AffineSpace3fa transform = AffineSpace3fa_load_unaligned((AffineSpace3fa*)userContext->world2inst[userContext->instStackSize-1]);
-
- float similarityScale = 0.f;
- const bool similtude = similarityTransform(transform, &similarityScale);
- assert((similtude && similarityScale > 0) || (!similtude && similarityScale == 0.f));
-
- PointQuery query_inst;
- query_inst.p = xfmPoint(transform, Vec3fa(query->x, query->y, query->z));
- query_inst.radius = query->radius * similarityScale;
- query_inst.time = query->time;
-
- PointQueryContext context_inst(scene, (PointQuery*)query,
- similtude ? POINT_QUERY_TYPE_SPHERE : POINT_QUERY_TYPE_AABB,
- queryFunc, userContext, similarityScale, userPtr);
- changed = scene->intersectors.pointQuery((PointQuery*)&query_inst, &context_inst);
- }
- else
- {
- PointQueryContext context(scene, (PointQuery*)query,
- POINT_QUERY_TYPE_SPHERE, queryFunc, userContext, 1.f, userPtr);
- changed = scene->intersectors.pointQuery((PointQuery*)query, &context);
- }
- return changed;
- }
-
- RTC_API bool rtcPointQuery(RTCScene hscene, RTCPointQuery* query, RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void* userPtr)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcPointQuery);
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- RTC_VERIFY_HANDLE(userContext);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed");
- if (((size_t)query) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "query not aligned to 16 bytes");
- if (((size_t)userContext) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "context not aligned to 16 bytes");
-#endif
-
- return pointQuery(scene, query, userContext, queryFunc, userPtr);
- RTC_CATCH_END2_FALSE(scene);
- }
-
- RTC_API bool rtcPointQuery4 (const int* valid, RTCScene hscene, RTCPointQuery4* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcPointQuery4);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed");
- if (((size_t)valid) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 16 bytes");
- if (((size_t)query) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "query not aligned to 16 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<4; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(point_query.travs,cnt,cnt,cnt);
-
- bool changed = false;
- PointQuery4* query4 = (PointQuery4*)query;
- PointQuery query1;
- for (size_t i=0; i<4; i++) {
- if (!valid[i]) continue;
- query4->get(i,query1);
- changed |= pointQuery(scene, (RTCPointQuery*)&query1, userContext, queryFunc, userPtrN?userPtrN[i]:NULL);
- query4->set(i,query1);
- }
- return changed;
- RTC_CATCH_END2_FALSE(scene);
- }
-
- RTC_API bool rtcPointQuery8 (const int* valid, RTCScene hscene, RTCPointQuery8* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcPointQuery8);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed");
- if (((size_t)valid) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 16 bytes");
- if (((size_t)query) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "query not aligned to 16 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<4; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(point_query.travs,cnt,cnt,cnt);
-
- bool changed = false;
- PointQuery8* query8 = (PointQuery8*)query;
- PointQuery query1;
- for (size_t i=0; i<8; i++) {
- if (!valid[i]) continue;
- query8->get(i,query1);
- changed |= pointQuery(scene, (RTCPointQuery*)&query1, userContext, queryFunc, userPtrN?userPtrN[i]:NULL);
- query8->set(i,query1);
- }
- return changed;
- RTC_CATCH_END2_FALSE(scene);
- }
-
- RTC_API bool rtcPointQuery16 (const int* valid, RTCScene hscene, RTCPointQuery16* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcPointQuery16);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed");
- if (((size_t)valid) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 16 bytes");
- if (((size_t)query) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "query not aligned to 16 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<4; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(point_query.travs,cnt,cnt,cnt);
-
- bool changed = false;
- PointQuery16* query16 = (PointQuery16*)query;
- PointQuery query1;
- for (size_t i=0; i<16; i++) {
- if (!valid[i]) continue;
- PointQuery query1; query16->get(i,query1);
- changed |= pointQuery(scene, (RTCPointQuery*)&query1, userContext, queryFunc, userPtrN?userPtrN[i]:NULL);
- query16->set(i,query1);
- }
- return changed;
- RTC_CATCH_END2_FALSE(scene);
- }
-
- RTC_API void rtcIntersect1 (RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit* rayhit)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcIntersect1);
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)rayhit) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 16 bytes");
-#endif
- STAT3(normal.travs,1,1,1);
- IntersectContext context(scene,user_context);
- scene->intersectors.intersect(*rayhit,&context);
-#if defined(DEBUG)
- ((RayHit*)rayhit)->verifyHit();
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcIntersect4 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit4* rayhit)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcIntersect4);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)valid) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 16 bytes");
- if (((size_t)rayhit) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit not aligned to 16 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<4; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(normal.travs,cnt,cnt,cnt);
-
- IntersectContext context(scene,user_context);
-#if !defined(EMBREE_RAY_PACKETS)
- RayHit4* rayhit4 = (RayHit4*)rayhit;
- for (size_t i=0; i<4; i++) {
- if (!valid[i]) continue;
- RayHit ray1; rayhit4->get(i,ray1);
- scene->intersectors.intersect((RTCRayHit&)ray1,&context);
- rayhit4->set(i,ray1);
- }
-#else
- scene->intersectors.intersect4(valid,*rayhit,&context);
-#endif
-
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcIntersect8 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit8* rayhit)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcIntersect8);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)valid) & 0x1F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 32 bytes");
- if (((size_t)rayhit) & 0x1F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit not aligned to 32 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<8; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(normal.travs,cnt,cnt,cnt);
-
- IntersectContext context(scene,user_context);
-#if !defined(EMBREE_RAY_PACKETS)
- RayHit8* rayhit8 = (RayHit8*) rayhit;
- for (size_t i=0; i<8; i++) {
- if (!valid[i]) continue;
- RayHit ray1; rayhit8->get(i,ray1);
- scene->intersectors.intersect((RTCRayHit&)ray1,&context);
- rayhit8->set(i,ray1);
- }
-#else
- if (likely(scene->intersectors.intersector8))
- scene->intersectors.intersect8(valid,*rayhit,&context);
- else
- scene->device->rayStreamFilters.intersectSOA(scene,(char*)rayhit,8,1,sizeof(RTCRayHit8),&context);
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcIntersect16 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit16* rayhit)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcIntersect16);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)valid) & 0x3F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 64 bytes");
- if (((size_t)rayhit) & 0x3F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit not aligned to 64 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<16; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(normal.travs,cnt,cnt,cnt);
-
- IntersectContext context(scene,user_context);
-#if !defined(EMBREE_RAY_PACKETS)
- RayHit16* rayhit16 = (RayHit16*) rayhit;
- for (size_t i=0; i<16; i++) {
- if (!valid[i]) continue;
- RayHit ray1; rayhit16->get(i,ray1);
- scene->intersectors.intersect((RTCRayHit&)ray1,&context);
- rayhit16->set(i,ray1);
- }
-#else
- if (likely(scene->intersectors.intersector16))
- scene->intersectors.intersect16(valid,*rayhit,&context);
- else
- scene->device->rayStreamFilters.intersectSOA(scene,(char*)rayhit,16,1,sizeof(RTCRayHit16),&context);
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcIntersect1M (RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit* rayhit, unsigned int M, size_t byteStride)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcIntersect1M);
-
-#if defined (EMBREE_RAY_PACKETS)
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)rayhit ) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes");
-#endif
- STAT3(normal.travs,M,M,M);
- IntersectContext context(scene,user_context);
-
- /* fast codepath for single rays */
- if (likely(M == 1)) {
- if (likely(rayhit->ray.tnear <= rayhit->ray.tfar))
- scene->intersectors.intersect(*rayhit,&context);
- }
-
- /* codepath for streams */
- else {
- scene->device->rayStreamFilters.intersectAOS(scene,rayhit,M,byteStride,&context);
- }
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect1M not supported");
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcIntersect1Mp (RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit** rn, unsigned int M)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcIntersect1Mp);
-
-#if defined (EMBREE_RAY_PACKETS)
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)rn) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes");
-#endif
- STAT3(normal.travs,M,M,M);
- IntersectContext context(scene,user_context);
-
- /* fast codepath for single rays */
- if (likely(M == 1)) {
- if (likely(rn[0]->ray.tnear <= rn[0]->ray.tfar))
- scene->intersectors.intersect(*rn[0],&context);
- }
-
- /* codepath for streams */
- else {
- scene->device->rayStreamFilters.intersectAOP(scene,rn,M,&context);
- }
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect1Mp not supported");
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcIntersectNM (RTCScene hscene, RTCIntersectContext* user_context, struct RTCRayHitN* rayhit, unsigned int N, unsigned int M, size_t byteStride)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcIntersectNM);
-
-#if defined (EMBREE_RAY_PACKETS)
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)rayhit) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes");
-#endif
- STAT3(normal.travs,N*M,N*M,N*M);
- IntersectContext context(scene,user_context);
-
- /* code path for single ray streams */
- if (likely(N == 1))
- {
- /* fast code path for streams of size 1 */
- if (likely(M == 1)) {
- if (likely(((RTCRayHit*)rayhit)->ray.tnear <= ((RTCRayHit*)rayhit)->ray.tfar))
- scene->intersectors.intersect(*(RTCRayHit*)rayhit,&context);
- }
- /* normal codepath for single ray streams */
- else {
- scene->device->rayStreamFilters.intersectAOS(scene,(RTCRayHit*)rayhit,M,byteStride,&context);
- }
- }
- /* code path for ray packet streams */
- else {
- scene->device->rayStreamFilters.intersectSOA(scene,(char*)rayhit,N,M,byteStride,&context);
- }
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersectNM not supported");
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcIntersectNp (RTCScene hscene, RTCIntersectContext* user_context, const RTCRayHitNp* rayhit, unsigned int N)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcIntersectNp);
-
-#if defined (EMBREE_RAY_PACKETS)
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)rayhit->ray.org_x ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.org_x not aligned to 4 bytes");
- if (((size_t)rayhit->ray.org_y ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.org_y not aligned to 4 bytes");
- if (((size_t)rayhit->ray.org_z ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.org_z not aligned to 4 bytes");
- if (((size_t)rayhit->ray.dir_x ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.dir_x not aligned to 4 bytes");
- if (((size_t)rayhit->ray.dir_y ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.dir_y not aligned to 4 bytes");
- if (((size_t)rayhit->ray.dir_z ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.dir_z not aligned to 4 bytes");
- if (((size_t)rayhit->ray.tnear ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.dir_x not aligned to 4 bytes");
- if (((size_t)rayhit->ray.tfar ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.tnear not aligned to 4 bytes");
- if (((size_t)rayhit->ray.time ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.time not aligned to 4 bytes");
- if (((size_t)rayhit->ray.mask ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.mask not aligned to 4 bytes");
- if (((size_t)rayhit->hit.Ng_x ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.Ng_x not aligned to 4 bytes");
- if (((size_t)rayhit->hit.Ng_y ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.Ng_y not aligned to 4 bytes");
- if (((size_t)rayhit->hit.Ng_z ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.Ng_z not aligned to 4 bytes");
- if (((size_t)rayhit->hit.u ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.u not aligned to 4 bytes");
- if (((size_t)rayhit->hit.v ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.v not aligned to 4 bytes");
- if (((size_t)rayhit->hit.geomID) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.geomID not aligned to 4 bytes");
- if (((size_t)rayhit->hit.primID) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.primID not aligned to 4 bytes");
- if (((size_t)rayhit->hit.instID) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.instID not aligned to 4 bytes");
-#endif
- STAT3(normal.travs,N,N,N);
- IntersectContext context(scene,user_context);
- scene->device->rayStreamFilters.intersectSOP(scene,rayhit,N,&context);
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersectNp not supported");
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcOccluded1 (RTCScene hscene, RTCIntersectContext* user_context, RTCRay* ray)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcOccluded1);
- STAT3(shadow.travs,1,1,1);
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)ray) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 16 bytes");
-#endif
- IntersectContext context(scene,user_context);
- scene->intersectors.occluded(*ray,&context);
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcOccluded4 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRay4* ray)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcOccluded4);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)valid) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 16 bytes");
- if (((size_t)ray) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 16 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<4; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(shadow.travs,cnt,cnt,cnt);
-
- IntersectContext context(scene,user_context);
-#if !defined(EMBREE_RAY_PACKETS)
- Ray4* ray4 = (Ray4*) ray;
- for (size_t i=0; i<4; i++) {
- if (!valid[i]) continue;
- Ray ray1; ray4->get(i,ray1);
- scene->intersectors.occluded((RTCRay&)ray1,&context);
- ray4->set(i,ray1);
- }
-#else
- scene->intersectors.occluded4(valid,*ray,&context);
-#endif
-
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcOccluded8 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRay8* ray)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcOccluded8);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)valid) & 0x1F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 32 bytes");
- if (((size_t)ray) & 0x1F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 32 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<8; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(shadow.travs,cnt,cnt,cnt);
-
- IntersectContext context(scene,user_context);
-#if !defined(EMBREE_RAY_PACKETS)
- Ray8* ray8 = (Ray8*) ray;
- for (size_t i=0; i<8; i++) {
- if (!valid[i]) continue;
- Ray ray1; ray8->get(i,ray1);
- scene->intersectors.occluded((RTCRay&)ray1,&context);
- ray8->set(i,ray1);
- }
-#else
- if (likely(scene->intersectors.intersector8))
- scene->intersectors.occluded8(valid,*ray,&context);
- else
- scene->device->rayStreamFilters.occludedSOA(scene,(char*)ray,8,1,sizeof(RTCRay8),&context);
-#endif
-
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcOccluded16 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRay16* ray)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcOccluded16);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)valid) & 0x3F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 64 bytes");
- if (((size_t)ray) & 0x3F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 64 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<16; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(shadow.travs,cnt,cnt,cnt);
-
- IntersectContext context(scene,user_context);
-#if !defined(EMBREE_RAY_PACKETS)
- Ray16* ray16 = (Ray16*) ray;
- for (size_t i=0; i<16; i++) {
- if (!valid[i]) continue;
- Ray ray1; ray16->get(i,ray1);
- scene->intersectors.occluded((RTCRay&)ray1,&context);
- ray16->set(i,ray1);
- }
-#else
- if (likely(scene->intersectors.intersector16))
- scene->intersectors.occluded16(valid,*ray,&context);
- else
- scene->device->rayStreamFilters.occludedSOA(scene,(char*)ray,16,1,sizeof(RTCRay16),&context);
-#endif
-
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcOccluded1M(RTCScene hscene, RTCIntersectContext* user_context, RTCRay* ray, unsigned int M, size_t byteStride)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcOccluded1M);
-
-#if defined (EMBREE_RAY_PACKETS)
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)ray) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes");
-#endif
- STAT3(shadow.travs,M,M,M);
- IntersectContext context(scene,user_context);
- /* fast codepath for streams of size 1 */
- if (likely(M == 1)) {
- if (likely(ray->tnear <= ray->tfar))
- scene->intersectors.occluded (*ray,&context);
- }
- /* codepath for normal streams */
- else {
- scene->device->rayStreamFilters.occludedAOS(scene,ray,M,byteStride,&context);
- }
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcOccluded1M not supported");
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcOccluded1Mp(RTCScene hscene, RTCIntersectContext* user_context, RTCRay** ray, unsigned int M)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcOccluded1Mp);
-
-#if defined (EMBREE_RAY_PACKETS)
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)ray) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes");
-#endif
- STAT3(shadow.travs,M,M,M);
- IntersectContext context(scene,user_context);
-
- /* fast codepath for streams of size 1 */
- if (likely(M == 1)) {
- if (likely(ray[0]->tnear <= ray[0]->tfar))
- scene->intersectors.occluded (*ray[0],&context);
- }
- /* codepath for normal streams */
- else {
- scene->device->rayStreamFilters.occludedAOP(scene,ray,M,&context);
- }
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcOccluded1Mp not supported");
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcOccludedNM(RTCScene hscene, RTCIntersectContext* user_context, RTCRayN* ray, unsigned int N, unsigned int M, size_t byteStride)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcOccludedNM);
-
-#if defined (EMBREE_RAY_PACKETS)
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (byteStride < sizeof(RTCRayHit)) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"byteStride too small");
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)ray) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes");
-#endif
- STAT3(shadow.travs,N*M,N*N,N*N);
- IntersectContext context(scene,user_context);
-
- /* codepath for single rays */
- if (likely(N == 1))
- {
- /* fast path for streams of size 1 */
- if (likely(M == 1)) {
- if (likely(((RTCRay*)ray)->tnear <= ((RTCRay*)ray)->tfar))
- scene->intersectors.occluded (*(RTCRay*)ray,&context);
- }
- /* codepath for normal ray streams */
- else {
- scene->device->rayStreamFilters.occludedAOS(scene,(RTCRay*)ray,M,byteStride,&context);
- }
- }
- /* code path for ray packet streams */
- else {
- scene->device->rayStreamFilters.occludedSOA(scene,(char*)ray,N,M,byteStride,&context);
- }
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcOccludedNM not supported");
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcOccludedNp(RTCScene hscene, RTCIntersectContext* user_context, const RTCRayNp* ray, unsigned int N)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcOccludedNp);
-
-#if defined (EMBREE_RAY_PACKETS)
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)ray->org_x ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "org_x not aligned to 4 bytes");
- if (((size_t)ray->org_y ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "org_y not aligned to 4 bytes");
- if (((size_t)ray->org_z ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "org_z not aligned to 4 bytes");
- if (((size_t)ray->dir_x ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "dir_x not aligned to 4 bytes");
- if (((size_t)ray->dir_y ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "dir_y not aligned to 4 bytes");
- if (((size_t)ray->dir_z ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "dir_z not aligned to 4 bytes");
- if (((size_t)ray->tnear ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "dir_x not aligned to 4 bytes");
- if (((size_t)ray->tfar ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "tnear not aligned to 4 bytes");
- if (((size_t)ray->time ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "time not aligned to 4 bytes");
- if (((size_t)ray->mask ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 4 bytes");
-#endif
- STAT3(shadow.travs,N,N,N);
- IntersectContext context(scene,user_context);
- scene->device->rayStreamFilters.occludedSOP(scene,ray,N,&context);
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcOccludedNp not supported");
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcRetainScene (RTCScene hscene)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcRetainScene);
- RTC_VERIFY_HANDLE(hscene);
- scene->refInc();
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcReleaseScene (RTCScene hscene)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcReleaseScene);
- RTC_VERIFY_HANDLE(hscene);
- scene->refDec();
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcSetGeometryInstancedScene(RTCGeometry hgeometry, RTCScene hscene)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- Ref<Scene> scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryInstancedScene);
- RTC_VERIFY_HANDLE(hgeometry);
- RTC_VERIFY_HANDLE(hscene);
- geometry->setInstancedScene(scene);
- RTC_CATCH_END2(geometry);
- }
-
- AffineSpace3fa loadTransform(RTCFormat format, const float* xfm)
- {
- AffineSpace3fa space = one;
- switch (format)
- {
- case RTC_FORMAT_FLOAT3X4_ROW_MAJOR:
- space = AffineSpace3fa(Vec3fa(xfm[ 0], xfm[ 4], xfm[ 8]),
- Vec3fa(xfm[ 1], xfm[ 5], xfm[ 9]),
- Vec3fa(xfm[ 2], xfm[ 6], xfm[10]),
- Vec3fa(xfm[ 3], xfm[ 7], xfm[11]));
- break;
-
- case RTC_FORMAT_FLOAT3X4_COLUMN_MAJOR:
- space = AffineSpace3fa(Vec3fa(xfm[ 0], xfm[ 1], xfm[ 2]),
- Vec3fa(xfm[ 3], xfm[ 4], xfm[ 5]),
- Vec3fa(xfm[ 6], xfm[ 7], xfm[ 8]),
- Vec3fa(xfm[ 9], xfm[10], xfm[11]));
- break;
-
- case RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR:
- space = AffineSpace3fa(Vec3fa(xfm[ 0], xfm[ 1], xfm[ 2]),
- Vec3fa(xfm[ 4], xfm[ 5], xfm[ 6]),
- Vec3fa(xfm[ 8], xfm[ 9], xfm[10]),
- Vec3fa(xfm[12], xfm[13], xfm[14]));
- break;
-
- default:
- throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid matrix format");
- break;
- }
- return space;
- }
-
- void storeTransform(const AffineSpace3fa& space, RTCFormat format, float* xfm)
- {
- switch (format)
- {
- case RTC_FORMAT_FLOAT3X4_ROW_MAJOR:
- xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vy.x; xfm[ 2] = space.l.vz.x; xfm[ 3] = space.p.x;
- xfm[ 4] = space.l.vx.y; xfm[ 5] = space.l.vy.y; xfm[ 6] = space.l.vz.y; xfm[ 7] = space.p.y;
- xfm[ 8] = space.l.vx.z; xfm[ 9] = space.l.vy.z; xfm[10] = space.l.vz.z; xfm[11] = space.p.z;
- break;
-
- case RTC_FORMAT_FLOAT3X4_COLUMN_MAJOR:
- xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vx.y; xfm[ 2] = space.l.vx.z;
- xfm[ 3] = space.l.vy.x; xfm[ 4] = space.l.vy.y; xfm[ 5] = space.l.vy.z;
- xfm[ 6] = space.l.vz.x; xfm[ 7] = space.l.vz.y; xfm[ 8] = space.l.vz.z;
- xfm[ 9] = space.p.x; xfm[10] = space.p.y; xfm[11] = space.p.z;
- break;
-
- case RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR:
- xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vx.y; xfm[ 2] = space.l.vx.z; xfm[ 3] = 0.f;
- xfm[ 4] = space.l.vy.x; xfm[ 5] = space.l.vy.y; xfm[ 6] = space.l.vy.z; xfm[ 7] = 0.f;
- xfm[ 8] = space.l.vz.x; xfm[ 9] = space.l.vz.y; xfm[10] = space.l.vz.z; xfm[11] = 0.f;
- xfm[12] = space.p.x; xfm[13] = space.p.y; xfm[14] = space.p.z; xfm[15] = 1.f;
- break;
-
- default:
- throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid matrix format");
- break;
- }
- }
-
- RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeStep, RTCFormat format, const void* xfm)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryTransform);
- RTC_VERIFY_HANDLE(hgeometry);
- RTC_VERIFY_HANDLE(xfm);
- const AffineSpace3fa transform = loadTransform(format, (const float*)xfm);
- geometry->setTransform(transform, timeStep);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryTransformQuaternion(RTCGeometry hgeometry, unsigned int timeStep, const RTCQuaternionDecomposition* qd)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryTransformQuaternion);
- RTC_VERIFY_HANDLE(hgeometry);
- RTC_VERIFY_HANDLE(qd);
-
- AffineSpace3fx transform;
- transform.l.vx.x = qd->scale_x;
- transform.l.vy.y = qd->scale_y;
- transform.l.vz.z = qd->scale_z;
- transform.l.vy.x = qd->skew_xy;
- transform.l.vz.x = qd->skew_xz;
- transform.l.vz.y = qd->skew_yz;
- transform.l.vx.y = qd->translation_x;
- transform.l.vx.z = qd->translation_y;
- transform.l.vy.z = qd->translation_z;
- transform.p.x = qd->shift_x;
- transform.p.y = qd->shift_y;
- transform.p.z = qd->shift_z;
-
- // normalize quaternion
- Quaternion3f q(qd->quaternion_r, qd->quaternion_i, qd->quaternion_j, qd->quaternion_k);
- q = normalize(q);
- transform.l.vx.w = q.i;
- transform.l.vy.w = q.j;
- transform.l.vz.w = q.k;
- transform.p.w = q.r;
-
- geometry->setQuaternionDecomposition(transform, timeStep);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcGetGeometryTransform(RTCGeometry hgeometry, float time, RTCFormat format, void* xfm)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometryTransform);
- const AffineSpace3fa transform = geometry->getTransform(time);
- storeTransform(transform, format, (float*)xfm);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcFilterIntersection(const struct RTCIntersectFunctionNArguments* const args_i, const struct RTCFilterFunctionNArguments* filter_args)
- {
- IntersectFunctionNArguments* args = (IntersectFunctionNArguments*) args_i;
- args->report(args,filter_args);
- }
-
- RTC_API void rtcFilterOcclusion(const struct RTCOccludedFunctionNArguments* const args_i, const struct RTCFilterFunctionNArguments* filter_args)
- {
- OccludedFunctionNArguments* args = (OccludedFunctionNArguments*) args_i;
- args->report(args,filter_args);
- }
-
- RTC_API RTCGeometry rtcNewGeometry (RTCDevice hdevice, RTCGeometryType type)
- {
- Device* device = (Device*) hdevice;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcNewGeometry);
- RTC_VERIFY_HANDLE(hdevice);
-
- switch (type)
- {
- case RTC_GEOMETRY_TYPE_TRIANGLE:
- {
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- createTriangleMeshTy createTriangleMesh = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createTriangleMesh);
- Geometry* geom = createTriangleMesh(device);
- return (RTCGeometry) geom->refInc();
-#else
- throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_TRIANGLE is not supported");
-#endif
- }
-
- case RTC_GEOMETRY_TYPE_QUAD:
- {
-#if defined(EMBREE_GEOMETRY_QUAD)
- createQuadMeshTy createQuadMesh = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createQuadMesh);
- Geometry* geom = createQuadMesh(device);
- return (RTCGeometry) geom->refInc();
-#else
- throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_QUAD is not supported");
-#endif
- }
-
- case RTC_GEOMETRY_TYPE_SPHERE_POINT:
- case RTC_GEOMETRY_TYPE_DISC_POINT:
- case RTC_GEOMETRY_TYPE_ORIENTED_DISC_POINT:
- {
-#if defined(EMBREE_GEOMETRY_POINT)
- createPointsTy createPoints = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_builder_cpu_features, createPoints);
-
- Geometry *geom;
- switch(type) {
- case RTC_GEOMETRY_TYPE_SPHERE_POINT:
- geom = createPoints(device, Geometry::GTY_SPHERE_POINT);
- break;
- case RTC_GEOMETRY_TYPE_DISC_POINT:
- geom = createPoints(device, Geometry::GTY_DISC_POINT);
- break;
- case RTC_GEOMETRY_TYPE_ORIENTED_DISC_POINT:
- geom = createPoints(device, Geometry::GTY_ORIENTED_DISC_POINT);
- break;
- default:
- geom = nullptr;
- break;
- }
- return (RTCGeometry) geom->refInc();
-#else
- throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_POINT is not supported");
-#endif
- }
-
- case RTC_GEOMETRY_TYPE_CONE_LINEAR_CURVE:
- case RTC_GEOMETRY_TYPE_ROUND_LINEAR_CURVE:
- case RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE:
-
- case RTC_GEOMETRY_TYPE_ROUND_BEZIER_CURVE:
- case RTC_GEOMETRY_TYPE_FLAT_BEZIER_CURVE:
- case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BEZIER_CURVE:
-
- case RTC_GEOMETRY_TYPE_ROUND_BSPLINE_CURVE:
- case RTC_GEOMETRY_TYPE_FLAT_BSPLINE_CURVE:
- case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BSPLINE_CURVE:
-
- case RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE:
- case RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE:
- case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_HERMITE_CURVE:
-
- case RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE:
- case RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE:
- case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_CATMULL_ROM_CURVE:
- {
-#if defined(EMBREE_GEOMETRY_CURVE)
- createLineSegmentsTy createLineSegments = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createLineSegments);
- createCurvesTy createCurves = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createCurves);
-
- Geometry* geom;
- switch (type) {
- case RTC_GEOMETRY_TYPE_CONE_LINEAR_CURVE : geom = createLineSegments (device,Geometry::GTY_CONE_LINEAR_CURVE); break;
- case RTC_GEOMETRY_TYPE_ROUND_LINEAR_CURVE : geom = createLineSegments (device,Geometry::GTY_ROUND_LINEAR_CURVE); break;
- case RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE : geom = createLineSegments (device,Geometry::GTY_FLAT_LINEAR_CURVE); break;
- //case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_LINEAR_CURVE : geom = createLineSegments (device,Geometry::GTY_ORIENTED_LINEAR_CURVE); break;
-
- case RTC_GEOMETRY_TYPE_ROUND_BEZIER_CURVE : geom = createCurves(device,Geometry::GTY_ROUND_BEZIER_CURVE); break;
- case RTC_GEOMETRY_TYPE_FLAT_BEZIER_CURVE : geom = createCurves(device,Geometry::GTY_FLAT_BEZIER_CURVE); break;
- case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BEZIER_CURVE : geom = createCurves(device,Geometry::GTY_ORIENTED_BEZIER_CURVE); break;
-
- case RTC_GEOMETRY_TYPE_ROUND_BSPLINE_CURVE : geom = createCurves(device,Geometry::GTY_ROUND_BSPLINE_CURVE); break;
- case RTC_GEOMETRY_TYPE_FLAT_BSPLINE_CURVE : geom = createCurves(device,Geometry::GTY_FLAT_BSPLINE_CURVE); break;
- case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BSPLINE_CURVE : geom = createCurves(device,Geometry::GTY_ORIENTED_BSPLINE_CURVE); break;
-
- case RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE : geom = createCurves(device,Geometry::GTY_ROUND_HERMITE_CURVE); break;
- case RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE : geom = createCurves(device,Geometry::GTY_FLAT_HERMITE_CURVE); break;
- case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_HERMITE_CURVE : geom = createCurves(device,Geometry::GTY_ORIENTED_HERMITE_CURVE); break;
-
- case RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE : geom = createCurves(device,Geometry::GTY_ROUND_CATMULL_ROM_CURVE); break;
- case RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE : geom = createCurves(device,Geometry::GTY_FLAT_CATMULL_ROM_CURVE); break;
- case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_CATMULL_ROM_CURVE : geom = createCurves(device,Geometry::GTY_ORIENTED_CATMULL_ROM_CURVE); break;
- default: geom = nullptr; break;
- }
- return (RTCGeometry) geom->refInc();
-#else
- throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_CURVE is not supported");
-#endif
- }
-
- case RTC_GEOMETRY_TYPE_SUBDIVISION:
- {
-#if defined(EMBREE_GEOMETRY_SUBDIVISION)
- createSubdivMeshTy createSubdivMesh = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX(device->enabled_cpu_features,createSubdivMesh);
- //SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createSubdivMesh); // FIXME: this does not work for some reason?
- Geometry* geom = createSubdivMesh(device);
- return (RTCGeometry) geom->refInc();
-#else
- throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_SUBDIVISION is not supported");
-#endif
- }
-
- case RTC_GEOMETRY_TYPE_USER:
- {
-#if defined(EMBREE_GEOMETRY_USER)
- createUserGeometryTy createUserGeometry = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createUserGeometry);
- Geometry* geom = createUserGeometry(device);
- return (RTCGeometry) geom->refInc();
-#else
- throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_USER is not supported");
-#endif
- }
-
- case RTC_GEOMETRY_TYPE_INSTANCE:
- {
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- createInstanceTy createInstance = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createInstance);
- Geometry* geom = createInstance(device);
- return (RTCGeometry) geom->refInc();
-#else
- throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_INSTANCE is not supported");
-#endif
- }
-
- case RTC_GEOMETRY_TYPE_GRID:
- {
-#if defined(EMBREE_GEOMETRY_GRID)
- createGridMeshTy createGridMesh = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createGridMesh);
- Geometry* geom = createGridMesh(device);
- return (RTCGeometry) geom->refInc();
-#else
- throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_GRID is not supported");
-#endif
- }
-
- default:
- throw_RTCError(RTC_ERROR_UNKNOWN,"invalid geometry type");
- }
-
- RTC_CATCH_END(device);
- return nullptr;
- }
-
- RTC_API void rtcSetGeometryUserPrimitiveCount(RTCGeometry hgeometry, unsigned int userPrimitiveCount)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryUserPrimitiveCount);
- RTC_VERIFY_HANDLE(hgeometry);
-
- if (unlikely(geometry->getType() != Geometry::GTY_USER_GEOMETRY))
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation only allowed for user geometries");
-
- geometry->setNumPrimitives(userPrimitiveCount);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryTimeStepCount(RTCGeometry hgeometry, unsigned int timeStepCount)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryTimeStepCount);
- RTC_VERIFY_HANDLE(hgeometry);
-
- if (timeStepCount > RTC_MAX_TIME_STEP_COUNT)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"number of time steps is out of range");
-
- geometry->setNumTimeSteps(timeStepCount);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryTimeRange(RTCGeometry hgeometry, float startTime, float endTime)
- {
- Ref<Geometry> geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryTimeRange);
- RTC_VERIFY_HANDLE(hgeometry);
-
- if (startTime > endTime)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"startTime has to be smaller or equal to the endTime");
-
- geometry->setTimeRange(BBox1f(startTime,endTime));
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryVertexAttributeCount(RTCGeometry hgeometry, unsigned int N)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryVertexAttributeCount);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setVertexAttributeCount(N);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryTopologyCount(RTCGeometry hgeometry, unsigned int N)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryTopologyCount);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setTopologyCount(N);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryBuildQuality (RTCGeometry hgeometry, RTCBuildQuality quality)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryBuildQuality);
- RTC_VERIFY_HANDLE(hgeometry);
- if (quality != RTC_BUILD_QUALITY_LOW &&
- quality != RTC_BUILD_QUALITY_MEDIUM &&
- quality != RTC_BUILD_QUALITY_HIGH &&
- quality != RTC_BUILD_QUALITY_REFIT)
- // -- GODOT start --
- // throw std::runtime_error("invalid build quality");
- abort();
- // -- GODOT end --
- geometry->setBuildQuality(quality);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryMaxRadiusScale(RTCGeometry hgeometry, float maxRadiusScale)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryMaxRadiusScale);
- RTC_VERIFY_HANDLE(hgeometry);
-#if RTC_MIN_WIDTH
- if (maxRadiusScale < 1.0f) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"maximal radius scale has to be larger or equal to 1");
- geometry->setMaxRadiusScale(maxRadiusScale);
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"min-width feature is not enabled");
-#endif
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryMask (RTCGeometry hgeometry, unsigned int mask)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryMask);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setMask(mask);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometrySubdivisionMode (RTCGeometry hgeometry, unsigned topologyID, RTCSubdivisionMode mode)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometrySubdivisionMode);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setSubdivisionMode(topologyID,mode);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryVertexAttributeTopology(RTCGeometry hgeometry, unsigned int vertexAttributeID, unsigned int topologyID)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryVertexAttributeTopology);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setVertexAttributeTopology(vertexAttributeID, topologyID);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryBuffer(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot, RTCFormat format, RTCBuffer hbuffer, size_t byteOffset, size_t byteStride, size_t itemCount)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- Ref<Buffer> buffer = (Buffer*)hbuffer;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryBuffer);
- RTC_VERIFY_HANDLE(hgeometry);
- RTC_VERIFY_HANDLE(hbuffer);
-
- if (geometry->device != buffer->device)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"inputs are from different devices");
-
- if (itemCount > 0xFFFFFFFFu)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"buffer too large");
-
- geometry->setBuffer(type, slot, format, buffer, byteOffset, byteStride, (unsigned int)itemCount);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetSharedGeometryBuffer(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot, RTCFormat format, const void* ptr, size_t byteOffset, size_t byteStride, size_t itemCount)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetSharedGeometryBuffer);
- RTC_VERIFY_HANDLE(hgeometry);
-
- if (itemCount > 0xFFFFFFFFu)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"buffer too large");
-
- Ref<Buffer> buffer = new Buffer(geometry->device, itemCount*byteStride, (char*)ptr + byteOffset);
- geometry->setBuffer(type, slot, format, buffer, 0, byteStride, (unsigned int)itemCount);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void* rtcSetNewGeometryBuffer(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot, RTCFormat format, size_t byteStride, size_t itemCount)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetNewGeometryBuffer);
- RTC_VERIFY_HANDLE(hgeometry);
-
- if (itemCount > 0xFFFFFFFFu)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"buffer too large");
-
- /* vertex buffers need to get overallocated slightly as elements are accessed using SSE loads */
- size_t bytes = itemCount*byteStride;
- if (type == RTC_BUFFER_TYPE_VERTEX || type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
- bytes += (16 - (byteStride%16))%16;
-
- Ref<Buffer> buffer = new Buffer(geometry->device, bytes);
- geometry->setBuffer(type, slot, format, buffer, 0, byteStride, (unsigned int)itemCount);
- return buffer->data();
- RTC_CATCH_END2(geometry);
- return nullptr;
- }
-
- RTC_API void* rtcGetGeometryBufferData(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometryBufferData);
- RTC_VERIFY_HANDLE(hgeometry);
- return geometry->getBuffer(type, slot);
- RTC_CATCH_END2(geometry);
- return nullptr;
- }
-
- RTC_API void rtcEnableGeometry (RTCGeometry hgeometry)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcEnableGeometry);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->enable();
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcUpdateGeometryBuffer (RTCGeometry hgeometry, RTCBufferType type, unsigned int slot)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcUpdateGeometryBuffer);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->updateBuffer(type, slot);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcDisableGeometry (RTCGeometry hgeometry)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcDisableGeometry);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->disable();
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryTessellationRate (RTCGeometry hgeometry, float tessellationRate)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryTessellationRate);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setTessellationRate(tessellationRate);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryUserData (RTCGeometry hgeometry, void* ptr)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryUserData);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setUserData(ptr);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void* rtcGetGeometryUserData (RTCGeometry hgeometry)
- {
- Geometry* geometry = (Geometry*) hgeometry; // no ref counting here!
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometryUserData);
- RTC_VERIFY_HANDLE(hgeometry);
- return geometry->getUserData();
- RTC_CATCH_END2(geometry);
- return nullptr;
- }
-
- RTC_API void rtcSetGeometryBoundsFunction (RTCGeometry hgeometry, RTCBoundsFunction bounds, void* userPtr)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryBoundsFunction);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setBoundsFunction(bounds,userPtr);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryDisplacementFunction (RTCGeometry hgeometry, RTCDisplacementFunctionN displacement)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryDisplacementFunction);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setDisplacementFunction(displacement);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryIntersectFunction (RTCGeometry hgeometry, RTCIntersectFunctionN intersect)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryIntersectFunction);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setIntersectFunctionN(intersect);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryPointQueryFunction(RTCGeometry hgeometry, RTCPointQueryFunction pointQuery)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryPointQueryFunction);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setPointQueryFunction(pointQuery);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API unsigned int rtcGetGeometryFirstHalfEdge(RTCGeometry hgeometry, unsigned int faceID)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometryFirstHalfEdge);
- return geometry->getFirstHalfEdge(faceID);
- RTC_CATCH_END2(geometry);
- return -1;
- }
-
- RTC_API unsigned int rtcGetGeometryFace(RTCGeometry hgeometry, unsigned int edgeID)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometryFace);
- return geometry->getFace(edgeID);
- RTC_CATCH_END2(geometry);
- return -1;
- }
-
- RTC_API unsigned int rtcGetGeometryNextHalfEdge(RTCGeometry hgeometry, unsigned int edgeID)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometryNextHalfEdge);
- return geometry->getNextHalfEdge(edgeID);
- RTC_CATCH_END2(geometry);
- return -1;
- }
-
- RTC_API unsigned int rtcGetGeometryPreviousHalfEdge(RTCGeometry hgeometry, unsigned int edgeID)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometryPreviousHalfEdge);
- return geometry->getPreviousHalfEdge(edgeID);
- RTC_CATCH_END2(geometry);
- return -1;
- }
-
- RTC_API unsigned int rtcGetGeometryOppositeHalfEdge(RTCGeometry hgeometry, unsigned int topologyID, unsigned int edgeID)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometryOppositeHalfEdge);
- return geometry->getOppositeHalfEdge(topologyID,edgeID);
- RTC_CATCH_END2(geometry);
- return -1;
- }
-
- RTC_API void rtcSetGeometryOccludedFunction (RTCGeometry hgeometry, RTCOccludedFunctionN occluded)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetOccludedFunctionN);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setOccludedFunctionN(occluded);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryIntersectFilterFunction (RTCGeometry hgeometry, RTCFilterFunctionN filter)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryIntersectFilterFunction);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setIntersectionFilterFunctionN(filter);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryOccludedFilterFunction (RTCGeometry hgeometry, RTCFilterFunctionN filter)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryOccludedFilterFunction);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setOcclusionFilterFunctionN(filter);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcInterpolate(const RTCInterpolateArguments* const args)
- {
- Geometry* geometry = (Geometry*) args->geometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcInterpolate);
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(args->geometry);
-#endif
- geometry->interpolate(args);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcInterpolateN(const RTCInterpolateNArguments* const args)
- {
- Geometry* geometry = (Geometry*) args->geometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcInterpolateN);
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(args->geometry);
-#endif
- geometry->interpolateN(args);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcCommitGeometry (RTCGeometry hgeometry)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcCommitGeometry);
- RTC_VERIFY_HANDLE(hgeometry);
- return geometry->commit();
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API unsigned int rtcAttachGeometry (RTCScene hscene, RTCGeometry hgeometry)
- {
- Scene* scene = (Scene*) hscene;
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcAttachGeometry);
- RTC_VERIFY_HANDLE(hscene);
- RTC_VERIFY_HANDLE(hgeometry);
- if (scene->device != geometry->device)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"inputs are from different devices");
- return scene->bind(RTC_INVALID_GEOMETRY_ID,geometry);
- RTC_CATCH_END2(scene);
- return -1;
- }
-
- RTC_API void rtcAttachGeometryByID (RTCScene hscene, RTCGeometry hgeometry, unsigned int geomID)
- {
- Scene* scene = (Scene*) hscene;
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcAttachGeometryByID);
- RTC_VERIFY_HANDLE(hscene);
- RTC_VERIFY_HANDLE(hgeometry);
- RTC_VERIFY_GEOMID(geomID);
- if (scene->device != geometry->device)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"inputs are from different devices");
- scene->bind(geomID,geometry);
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcDetachGeometry (RTCScene hscene, unsigned int geomID)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcDetachGeometry);
- RTC_VERIFY_HANDLE(hscene);
- RTC_VERIFY_GEOMID(geomID);
- scene->detachGeometry(geomID);
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcRetainGeometry (RTCGeometry hgeometry)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcRetainGeometry);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->refInc();
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcReleaseGeometry (RTCGeometry hgeometry)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcReleaseGeometry);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->refDec();
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API RTCGeometry rtcGetGeometry (RTCScene hscene, unsigned int geomID)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometry);
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- RTC_VERIFY_GEOMID(geomID);
-#endif
- return (RTCGeometry) scene->get(geomID);
- RTC_CATCH_END2(scene);
- return nullptr;
- }
-
-RTC_NAMESPACE_END
diff --git a/thirdparty/embree-aarch64/kernels/common/rtcore.h b/thirdparty/embree-aarch64/kernels/common/rtcore.h
deleted file mode 100644
index 4b070e122b..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/rtcore.h
+++ /dev/null
@@ -1,142 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../../include/embree3/rtcore.h"
-RTC_NAMESPACE_USE
-
-namespace embree
-{
- /*! decoding of intersection flags */
- __forceinline bool isCoherent (RTCIntersectContextFlags flags) { return (flags & RTC_INTERSECT_CONTEXT_FLAG_COHERENT) == RTC_INTERSECT_CONTEXT_FLAG_COHERENT; }
- __forceinline bool isIncoherent(RTCIntersectContextFlags flags) { return (flags & RTC_INTERSECT_CONTEXT_FLAG_COHERENT) == RTC_INTERSECT_CONTEXT_FLAG_INCOHERENT; }
-
-#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR >= 8)
-# define USE_TASK_ARENA 1
-#else
-# define USE_TASK_ARENA 0
-#endif
-
-#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION >= 11009) // TBB 2019 Update 9
-# define TASKING_TBB_USE_TASK_ISOLATION 1
-#else
-# define TASKING_TBB_USE_TASK_ISOLATION 0
-#endif
-
-/*! Macros used in the rtcore API implementation */
-// -- GODOT start --
-// #define RTC_CATCH_BEGIN try {
-#define RTC_CATCH_BEGIN
-
-// #define RTC_CATCH_END(device) \
-// } catch (std::bad_alloc&) { \
-// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
-// } catch (rtcore_error& e) { \
-// Device::process_error(device,e.error,e.what()); \
-// } catch (std::exception& e) { \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
-// } catch (...) { \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
-// }
-#define RTC_CATCH_END(device)
-
-// #define RTC_CATCH_END2(scene) \
-// } catch (std::bad_alloc&) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
-// } catch (rtcore_error& e) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,e.error,e.what()); \
-// } catch (std::exception& e) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
-// } catch (...) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
-// }
-#define RTC_CATCH_END2(scene)
-
-// #define RTC_CATCH_END2_FALSE(scene) \
-// } catch (std::bad_alloc&) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
-// return false; \
-// } catch (rtcore_error& e) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,e.error,e.what()); \
-// return false; \
-// } catch (std::exception& e) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
-// return false; \
-// } catch (...) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
-// return false; \
-// }
-#define RTC_CATCH_END2_FALSE(scene) return false;
-// -- GODOT end --
-
-#define RTC_VERIFY_HANDLE(handle) \
- if (handle == nullptr) { \
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \
- }
-
-#define RTC_VERIFY_GEOMID(id) \
- if (id == RTC_INVALID_GEOMETRY_ID) { \
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \
- }
-
-#define RTC_VERIFY_UPPER(id,upper) \
- if (id > upper) { \
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \
- }
-
-#define RTC_VERIFY_RANGE(id,lower,upper) \
- if (id < lower || id > upper) \
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"argument out of bounds");
-
-#if 0 // enable to debug print all API calls
-#define RTC_TRACE(x) std::cout << #x << std::endl;
-#else
-#define RTC_TRACE(x)
-#endif
-
-// -- GODOT begin --
-// /*! used to throw embree API errors */
-// struct rtcore_error : public std::exception
-// {
-// __forceinline rtcore_error(RTCError error, const std::string& str)
-// : error(error), str(str) {}
-//
-// ~rtcore_error() throw() {}
-//
-// const char* what () const throw () {
-// return str.c_str();
-// }
-//
-// RTCError error;
-// std::string str;
-// };
-// -- GODOT end --
-
-#if defined(DEBUG) // only report file and line in debug mode
- // -- GODOT begin --
- // #define throw_RTCError(error,str) \
- // throw rtcore_error(error,std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
- #define throw_RTCError(error,str) \
- printf(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)), abort();
- // -- GODOT end --
-#else
- // -- GODOT begin --
- // #define throw_RTCError(error,str) \
- // throw rtcore_error(error,str);
- #define throw_RTCError(error,str) \
- abort();
- // -- GODOT end --
-#endif
-
-#define RTC_BUILD_ARGUMENTS_HAS(settings,member) \
- (settings.byteSize > (offsetof(RTCBuildArguments,member)+sizeof(settings.member)))
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/rtcore_builder.cpp b/thirdparty/embree-aarch64/kernels/common/rtcore_builder.cpp
deleted file mode 100644
index 6bb96bba07..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/rtcore_builder.cpp
+++ /dev/null
@@ -1,442 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#define RTC_EXPORT_API
-
-#include "default.h"
-#include "device.h"
-#include "scene.h"
-#include "context.h"
-#include "alloc.h"
-
-#include "../builders/bvh_builder_sah.h"
-#include "../builders/bvh_builder_morton.h"
-
-namespace embree
-{
- namespace isa // FIXME: support more ISAs for builders
- {
- struct BVH : public RefCount
- {
- BVH (Device* device)
- : device(device), allocator(device,true), morton_src(device,0), morton_tmp(device,0)
- {
- device->refInc();
- }
-
- ~BVH() {
- device->refDec();
- }
-
- public:
- Device* device;
- FastAllocator allocator;
- mvector<BVHBuilderMorton::BuildPrim> morton_src;
- mvector<BVHBuilderMorton::BuildPrim> morton_tmp;
- };
-
- void* rtcBuildBVHMorton(const RTCBuildArguments* arguments)
- {
- BVH* bvh = (BVH*) arguments->bvh;
- RTCBuildPrimitive* prims_i = arguments->primitives;
- size_t primitiveCount = arguments->primitiveCount;
- RTCCreateNodeFunction createNode = arguments->createNode;
- RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren;
- RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds;
- RTCCreateLeafFunction createLeaf = arguments->createLeaf;
- RTCProgressMonitorFunction buildProgress = arguments->buildProgress;
- void* userPtr = arguments->userPtr;
-
- std::atomic<size_t> progress(0);
-
- /* initialize temporary arrays for morton builder */
- PrimRef* prims = (PrimRef*) prims_i;
- mvector<BVHBuilderMorton::BuildPrim>& morton_src = bvh->morton_src;
- mvector<BVHBuilderMorton::BuildPrim>& morton_tmp = bvh->morton_tmp;
- morton_src.resize(primitiveCount);
- morton_tmp.resize(primitiveCount);
-
- /* compute centroid bounds */
- const BBox3fa centBounds = parallel_reduce ( size_t(0), primitiveCount, BBox3fa(empty), [&](const range<size_t>& r) -> BBox3fa {
-
- BBox3fa bounds(empty);
- for (size_t i=r.begin(); i<r.end(); i++)
- bounds.extend(prims[i].bounds().center2());
- return bounds;
- }, BBox3fa::merge);
-
- /* compute morton codes */
- BVHBuilderMorton::MortonCodeMapping mapping(centBounds);
- parallel_for ( size_t(0), primitiveCount, [&](const range<size_t>& r) {
- BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton_src[r.begin()]);
- for (size_t i=r.begin(); i<r.end(); i++) {
- generator(prims[i].bounds(),(unsigned) i);
- }
- });
-
- /* start morton build */
- std::pair<void*,BBox3fa> root = BVHBuilderMorton::build<std::pair<void*,BBox3fa>>(
-
- /* thread local allocator for fast allocations */
- [&] () -> FastAllocator::CachedAllocator {
- return bvh->allocator.getCachedAllocator();
- },
-
- /* lambda function that allocates BVH nodes */
- [&] ( const FastAllocator::CachedAllocator& alloc, size_t N ) -> void* {
- return createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr);
- },
-
- /* lambda function that sets bounds */
- [&] (void* node, const std::pair<void*,BBox3fa>* children, size_t N) -> std::pair<void*,BBox3fa>
- {
- BBox3fa bounds = empty;
- void* childptrs[BVHBuilderMorton::MAX_BRANCHING_FACTOR];
- const RTCBounds* cbounds[BVHBuilderMorton::MAX_BRANCHING_FACTOR];
- for (size_t i=0; i<N; i++) {
- bounds.extend(children[i].second);
- childptrs[i] = children[i].first;
- cbounds[i] = (const RTCBounds*)&children[i].second;
- }
- setNodeBounds(node,cbounds,(unsigned int)N,userPtr);
- setNodeChildren(node,childptrs, (unsigned int)N,userPtr);
- return std::make_pair(node,bounds);
- },
-
- /* lambda function that creates BVH leaves */
- [&]( const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc) -> std::pair<void*,BBox3fa>
- {
- RTCBuildPrimitive localBuildPrims[RTC_BUILD_MAX_PRIMITIVES_PER_LEAF];
- BBox3fa bounds = empty;
- for (size_t i=0;i<current.size();i++)
- {
- const size_t id = morton_src[current.begin()+i].index;
- bounds.extend(prims[id].bounds());
- localBuildPrims[i] = prims_i[id];
- }
- void* node = createLeaf((RTCThreadLocalAllocator)&alloc,localBuildPrims,current.size(),userPtr);
- return std::make_pair(node,bounds);
- },
-
- /* lambda that calculates the bounds for some primitive */
- [&] (const BVHBuilderMorton::BuildPrim& morton) -> BBox3fa {
- return prims[morton.index].bounds();
- },
-
- /* progress monitor function */
- [&] (size_t dn) {
- if (!buildProgress) return true;
- const size_t n = progress.fetch_add(dn)+dn;
- const double f = std::min(1.0,double(n)/double(primitiveCount));
- return buildProgress(userPtr,f);
- },
-
- morton_src.data(),morton_tmp.data(),primitiveCount,
- *arguments);
-
- bvh->allocator.cleanup();
- return root.first;
- }
-
- void* rtcBuildBVHBinnedSAH(const RTCBuildArguments* arguments)
- {
- BVH* bvh = (BVH*) arguments->bvh;
- RTCBuildPrimitive* prims = arguments->primitives;
- size_t primitiveCount = arguments->primitiveCount;
- RTCCreateNodeFunction createNode = arguments->createNode;
- RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren;
- RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds;
- RTCCreateLeafFunction createLeaf = arguments->createLeaf;
- RTCProgressMonitorFunction buildProgress = arguments->buildProgress;
- void* userPtr = arguments->userPtr;
-
- std::atomic<size_t> progress(0);
-
- /* calculate priminfo */
- auto computeBounds = [&](const range<size_t>& r) -> CentGeomBBox3fa
- {
- CentGeomBBox3fa bounds(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- bounds.extend((BBox3fa&)prims[j]);
- return bounds;
- };
- const CentGeomBBox3fa bounds =
- parallel_reduce(size_t(0),primitiveCount,size_t(1024),size_t(1024),CentGeomBBox3fa(empty), computeBounds, CentGeomBBox3fa::merge2);
-
- const PrimInfo pinfo(0,primitiveCount,bounds);
-
- /* build BVH */
- void* root = BVHBuilderBinnedSAH::build<void*>(
-
- /* thread local allocator for fast allocations */
- [&] () -> FastAllocator::CachedAllocator {
- return bvh->allocator.getCachedAllocator();
- },
-
- /* lambda function that creates BVH nodes */
- [&](BVHBuilderBinnedSAH::BuildRecord* children, const size_t N, const FastAllocator::CachedAllocator& alloc) -> void*
- {
- void* node = createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr);
- const RTCBounds* cbounds[GeneralBVHBuilder::MAX_BRANCHING_FACTOR];
- for (size_t i=0; i<N; i++) cbounds[i] = (const RTCBounds*) &children[i].prims.geomBounds;
- setNodeBounds(node,cbounds, (unsigned int)N,userPtr);
- return node;
- },
-
- /* lambda function that updates BVH nodes */
- [&](const BVHBuilderBinnedSAH::BuildRecord& precord, const BVHBuilderBinnedSAH::BuildRecord* crecords, void* node, void** children, const size_t N) -> void* {
- setNodeChildren(node,children, (unsigned int)N,userPtr);
- return node;
- },
-
- /* lambda function that creates BVH leaves */
- [&](const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> void* {
- return createLeaf((RTCThreadLocalAllocator)&alloc,(RTCBuildPrimitive*)(prims+range.begin()),range.size(),userPtr);
- },
-
- /* progress monitor function */
- [&] (size_t dn) {
- if (!buildProgress) return true;
- const size_t n = progress.fetch_add(dn)+dn;
- const double f = std::min(1.0,double(n)/double(primitiveCount));
- return buildProgress(userPtr,f);
- },
-
- (PrimRef*)prims,pinfo,*arguments);
-
- bvh->allocator.cleanup();
- return root;
- }
-
- static __forceinline const std::pair<CentGeomBBox3fa,unsigned int> mergePair(const std::pair<CentGeomBBox3fa,unsigned int>& a, const std::pair<CentGeomBBox3fa,unsigned int>& b) {
- CentGeomBBox3fa centBounds = CentGeomBBox3fa::merge2(a.first,b.first);
- unsigned int maxGeomID = max(a.second,b.second);
- return std::pair<CentGeomBBox3fa,unsigned int>(centBounds,maxGeomID);
- }
-
- void* rtcBuildBVHSpatialSAH(const RTCBuildArguments* arguments)
- {
- BVH* bvh = (BVH*) arguments->bvh;
- RTCBuildPrimitive* prims = arguments->primitives;
- size_t primitiveCount = arguments->primitiveCount;
- RTCCreateNodeFunction createNode = arguments->createNode;
- RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren;
- RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds;
- RTCCreateLeafFunction createLeaf = arguments->createLeaf;
- RTCSplitPrimitiveFunction splitPrimitive = arguments->splitPrimitive;
- RTCProgressMonitorFunction buildProgress = arguments->buildProgress;
- void* userPtr = arguments->userPtr;
-
- std::atomic<size_t> progress(0);
-
- /* calculate priminfo */
-
- auto computeBounds = [&](const range<size_t>& r) -> std::pair<CentGeomBBox3fa,unsigned int>
- {
- CentGeomBBox3fa bounds(empty);
- unsigned maxGeomID = 0;
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- bounds.extend((BBox3fa&)prims[j]);
- maxGeomID = max(maxGeomID,prims[j].geomID);
- }
- return std::pair<CentGeomBBox3fa,unsigned int>(bounds,maxGeomID);
- };
-
-
- const std::pair<CentGeomBBox3fa,unsigned int> pair =
- parallel_reduce(size_t(0),primitiveCount,size_t(1024),size_t(1024),std::pair<CentGeomBBox3fa,unsigned int>(CentGeomBBox3fa(empty),0), computeBounds, mergePair);
-
- CentGeomBBox3fa bounds = pair.first;
- const unsigned int maxGeomID = pair.second;
-
- if (unlikely(maxGeomID >= ((unsigned int)1 << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS))))
- {
- /* fallback code for max geomID larger than threshold */
- return rtcBuildBVHBinnedSAH(arguments);
- }
-
- const PrimInfo pinfo(0,primitiveCount,bounds);
-
- /* function that splits a build primitive */
- struct Splitter
- {
- Splitter (RTCSplitPrimitiveFunction splitPrimitive, unsigned geomID, unsigned primID, void* userPtr)
- : splitPrimitive(splitPrimitive), geomID(geomID), primID(primID), userPtr(userPtr) {}
-
- __forceinline void operator() (PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const
- {
- prim.geomIDref() &= BVHBuilderBinnedFastSpatialSAH::GEOMID_MASK;
- splitPrimitive((RTCBuildPrimitive*)&prim,(unsigned)dim,pos,(RTCBounds*)&left_o,(RTCBounds*)&right_o,userPtr);
- left_o.geomIDref() = geomID; left_o.primIDref() = primID;
- right_o.geomIDref() = geomID; right_o.primIDref() = primID;
- }
-
- __forceinline void operator() (const BBox3fa& box, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const
- {
- PrimRef prim(box,geomID & BVHBuilderBinnedFastSpatialSAH::GEOMID_MASK,primID);
- splitPrimitive((RTCBuildPrimitive*)&prim,(unsigned)dim,pos,(RTCBounds*)&left_o,(RTCBounds*)&right_o,userPtr);
- }
-
- RTCSplitPrimitiveFunction splitPrimitive;
- unsigned geomID;
- unsigned primID;
- void* userPtr;
- };
-
- /* build BVH */
- void* root = BVHBuilderBinnedFastSpatialSAH::build<void*>(
-
- /* thread local allocator for fast allocations */
- [&] () -> FastAllocator::CachedAllocator {
- return bvh->allocator.getCachedAllocator();
- },
-
- /* lambda function that creates BVH nodes */
- [&] (BVHBuilderBinnedFastSpatialSAH::BuildRecord* children, const size_t N, const FastAllocator::CachedAllocator& alloc) -> void*
- {
- void* node = createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr);
- const RTCBounds* cbounds[GeneralBVHBuilder::MAX_BRANCHING_FACTOR];
- for (size_t i=0; i<N; i++) cbounds[i] = (const RTCBounds*) &children[i].prims.geomBounds;
- setNodeBounds(node,cbounds, (unsigned int)N,userPtr);
- return node;
- },
-
- /* lambda function that updates BVH nodes */
- [&] (const BVHBuilderBinnedFastSpatialSAH::BuildRecord& precord, const BVHBuilderBinnedFastSpatialSAH::BuildRecord* crecords, void* node, void** children, const size_t N) -> void* {
- setNodeChildren(node,children, (unsigned int)N,userPtr);
- return node;
- },
-
- /* lambda function that creates BVH leaves */
- [&] (const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> void* {
- return createLeaf((RTCThreadLocalAllocator)&alloc,(RTCBuildPrimitive*)(prims+range.begin()),range.size(),userPtr);
- },
-
- /* returns the splitter */
- [&] ( const PrimRef& prim ) -> Splitter {
- return Splitter(splitPrimitive,prim.geomID(),prim.primID(),userPtr);
- },
-
- /* progress monitor function */
- [&] (size_t dn) {
- if (!buildProgress) return true;
- const size_t n = progress.fetch_add(dn)+dn;
- const double f = std::min(1.0,double(n)/double(primitiveCount));
- return buildProgress(userPtr,f);
- },
-
- (PrimRef*)prims,
- arguments->primitiveArrayCapacity,
- pinfo,*arguments);
-
- bvh->allocator.cleanup();
- return root;
- }
- }
-}
-
-using namespace embree;
-using namespace embree::isa;
-
-RTC_NAMESPACE_BEGIN
-
- RTC_API RTCBVH rtcNewBVH(RTCDevice device)
- {
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcNewAllocator);
- RTC_VERIFY_HANDLE(device);
- BVH* bvh = new BVH((Device*)device);
- return (RTCBVH) bvh->refInc();
- RTC_CATCH_END((Device*)device);
- return nullptr;
- }
-
- RTC_API void* rtcBuildBVH(const RTCBuildArguments* arguments)
- {
- BVH* bvh = (BVH*) arguments->bvh;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcBuildBVH);
- RTC_VERIFY_HANDLE(bvh);
- RTC_VERIFY_HANDLE(arguments);
- RTC_VERIFY_HANDLE(arguments->createNode);
- RTC_VERIFY_HANDLE(arguments->setNodeChildren);
- RTC_VERIFY_HANDLE(arguments->setNodeBounds);
- RTC_VERIFY_HANDLE(arguments->createLeaf);
-
- if (arguments->primitiveArrayCapacity < arguments->primitiveCount)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"primitiveArrayCapacity must be greater or equal to primitiveCount")
-
- /* initialize the allocator */
- bvh->allocator.init_estimate(arguments->primitiveCount*sizeof(BBox3fa));
- bvh->allocator.reset();
-
- /* switch between differnet builders based on quality level */
- if (arguments->buildQuality == RTC_BUILD_QUALITY_LOW)
- return rtcBuildBVHMorton(arguments);
- else if (arguments->buildQuality == RTC_BUILD_QUALITY_MEDIUM)
- return rtcBuildBVHBinnedSAH(arguments);
- else if (arguments->buildQuality == RTC_BUILD_QUALITY_HIGH) {
- if (arguments->splitPrimitive == nullptr || arguments->primitiveArrayCapacity <= arguments->primitiveCount)
- return rtcBuildBVHBinnedSAH(arguments);
- else
- return rtcBuildBVHSpatialSAH(arguments);
- }
- else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid build quality");
-
- /* if we are in dynamic mode, then do not clear temporary data */
- if (!(arguments->buildFlags & RTC_BUILD_FLAG_DYNAMIC))
- {
- bvh->morton_src.clear();
- bvh->morton_tmp.clear();
- }
-
- RTC_CATCH_END(bvh->device);
- return nullptr;
- }
-
- RTC_API void* rtcThreadLocalAlloc(RTCThreadLocalAllocator localAllocator, size_t bytes, size_t align)
- {
- FastAllocator::CachedAllocator* alloc = (FastAllocator::CachedAllocator*) localAllocator;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcThreadLocalAlloc);
- return alloc->malloc0(bytes,align);
- RTC_CATCH_END(alloc->alloc->getDevice());
- return nullptr;
- }
-
- RTC_API void rtcMakeStaticBVH(RTCBVH hbvh)
- {
- BVH* bvh = (BVH*) hbvh;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcStaticBVH);
- RTC_VERIFY_HANDLE(hbvh);
- bvh->morton_src.clear();
- bvh->morton_tmp.clear();
- RTC_CATCH_END(bvh->device);
- }
-
- RTC_API void rtcRetainBVH(RTCBVH hbvh)
- {
- BVH* bvh = (BVH*) hbvh;
- Device* device = bvh ? bvh->device : nullptr;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcRetainBVH);
- RTC_VERIFY_HANDLE(hbvh);
- bvh->refInc();
- RTC_CATCH_END(device);
- }
-
- RTC_API void rtcReleaseBVH(RTCBVH hbvh)
- {
- BVH* bvh = (BVH*) hbvh;
- Device* device = bvh ? bvh->device : nullptr;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcReleaseBVH);
- RTC_VERIFY_HANDLE(hbvh);
- bvh->refDec();
- RTC_CATCH_END(device);
- }
-
-RTC_NAMESPACE_END
diff --git a/thirdparty/embree-aarch64/kernels/common/scene.cpp b/thirdparty/embree-aarch64/kernels/common/scene.cpp
deleted file mode 100644
index 1e23aeb415..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene.cpp
+++ /dev/null
@@ -1,976 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "scene.h"
-
-#include "../bvh/bvh4_factory.h"
-#include "../bvh/bvh8_factory.h"
-#include "../../common/algorithms/parallel_reduce.h"
-
-namespace embree
-{
- /* error raising rtcIntersect and rtcOccluded functions */
- void missing_rtcCommit() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); }
- void invalid_rtcIntersect1() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect and rtcOccluded not enabled"); }
- void invalid_rtcIntersect4() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect4 and rtcOccluded4 not enabled"); }
- void invalid_rtcIntersect8() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect8 and rtcOccluded8 not enabled"); }
- void invalid_rtcIntersect16() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect16 and rtcOccluded16 not enabled"); }
- void invalid_rtcIntersectN() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersectN and rtcOccludedN not enabled"); }
-
- Scene::Scene (Device* device)
- : device(device),
- flags_modified(true), enabled_geometry_types(0),
- scene_flags(RTC_SCENE_FLAG_NONE),
- quality_flags(RTC_BUILD_QUALITY_MEDIUM),
- is_build(false), modified(true),
- progressInterface(this), progress_monitor_function(nullptr), progress_monitor_ptr(nullptr), progress_monitor_counter(0)
- {
- device->refInc();
-
- intersectors = Accel::Intersectors(missing_rtcCommit);
-
- /* one can overwrite flags through device for debugging */
- if (device->quality_flags != -1)
- quality_flags = (RTCBuildQuality) device->quality_flags;
- if (device->scene_flags != -1)
- scene_flags = (RTCSceneFlags) device->scene_flags;
- }
-
- Scene::~Scene() noexcept
- {
- device->refDec();
- }
-
- void Scene::printStatistics()
- {
- /* calculate maximum number of time segments */
- unsigned max_time_steps = 0;
- for (size_t i=0; i<size(); i++) {
- if (!get(i)) continue;
- max_time_steps = max(max_time_steps,get(i)->numTimeSteps);
- }
-
- /* initialize vectors*/
- std::vector<size_t> statistics[Geometry::GTY_END];
- for (size_t i=0; i<Geometry::GTY_END; i++)
- statistics[i].resize(max_time_steps);
-
- /* gather statistics */
- for (size_t i=0; i<size(); i++)
- {
- if (!get(i)) continue;
- int ty = get(i)->getType();
- assert(ty<Geometry::GTY_END);
- int timesegments = get(i)->numTimeSegments();
- assert((unsigned int)timesegments < max_time_steps);
- statistics[ty][timesegments] += get(i)->size();
- }
-
- /* print statistics */
- std::cout << std::setw(23) << "segments" << ": ";
- for (size_t t=0; t<max_time_steps; t++)
- std::cout << std::setw(10) << t;
- std::cout << std::endl;
-
- std::cout << "-------------------------";
- for (size_t t=0; t<max_time_steps; t++)
- std::cout << "----------";
- std::cout << std::endl;
-
- for (size_t p=0; p<Geometry::GTY_END; p++)
- {
- if (std::string(Geometry::gtype_names[p]) == "") continue;
- std::cout << std::setw(23) << Geometry::gtype_names[p] << ": ";
- for (size_t t=0; t<max_time_steps; t++)
- std::cout << std::setw(10) << statistics[p][t];
- std::cout << std::endl;
- }
- }
-
- void Scene::createTriangleAccel()
- {
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- if (device->tri_accel == "default")
- {
- if (quality_flags != RTC_BUILD_QUALITY_LOW)
- {
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
- switch (mode) {
- case /*0b00*/ 0:
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX())
- {
- if (quality_flags == RTC_BUILD_QUALITY_HIGH)
- accels_add(device->bvh8_factory->BVH8Triangle4(this,BVHFactory::BuildVariant::HIGH_QUALITY,BVHFactory::IntersectVariant::FAST));
- else
- accels_add(device->bvh8_factory->BVH8Triangle4(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST));
- }
- else
-#endif
- {
- if (quality_flags == RTC_BUILD_QUALITY_HIGH)
- accels_add(device->bvh4_factory->BVH4Triangle4(this,BVHFactory::BuildVariant::HIGH_QUALITY,BVHFactory::IntersectVariant::FAST));
- else
- accels_add(device->bvh4_factory->BVH4Triangle4(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST));
- }
- break;
-
- case /*0b01*/ 1:
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX())
- accels_add(device->bvh8_factory->BVH8Triangle4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST));
- else
-#endif
- accels_add(device->bvh4_factory->BVH4Triangle4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST));
-
- break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- else /* dynamic */
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX())
- {
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
- switch (mode) {
- case /*0b00*/ 0: accels_add(device->bvh8_factory->BVH8Triangle4 (this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b01*/ 1: accels_add(device->bvh8_factory->BVH8Triangle4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- else
-#endif
- {
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
- switch (mode) {
- case /*0b00*/ 0: accels_add(device->bvh4_factory->BVH4Triangle4 (this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b01*/ 1: accels_add(device->bvh4_factory->BVH4Triangle4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- }
- }
- else if (device->tri_accel == "bvh4.triangle4") accels_add(device->bvh4_factory->BVH4Triangle4 (this));
- else if (device->tri_accel == "bvh4.triangle4v") accels_add(device->bvh4_factory->BVH4Triangle4v(this));
- else if (device->tri_accel == "bvh4.triangle4i") accels_add(device->bvh4_factory->BVH4Triangle4i(this));
- else if (device->tri_accel == "qbvh4.triangle4i") accels_add(device->bvh4_factory->BVH4QuantizedTriangle4i(this));
-
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->tri_accel == "bvh8.triangle4") accels_add(device->bvh8_factory->BVH8Triangle4 (this));
- else if (device->tri_accel == "bvh8.triangle4v") accels_add(device->bvh8_factory->BVH8Triangle4v(this));
- else if (device->tri_accel == "bvh8.triangle4i") accels_add(device->bvh8_factory->BVH8Triangle4i(this));
- else if (device->tri_accel == "qbvh8.triangle4i") accels_add(device->bvh8_factory->BVH8QuantizedTriangle4i(this));
- else if (device->tri_accel == "qbvh8.triangle4") accels_add(device->bvh8_factory->BVH8QuantizedTriangle4(this));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown triangle acceleration structure "+device->tri_accel);
-#endif
- }
-
- void Scene::createTriangleMBAccel()
- {
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- if (device->tri_accel_mb == "default")
- {
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
-
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX2()) // BVH8 reduces performance on AVX only-machines
- {
- switch (mode) {
- case /*0b00*/ 0: accels_add(device->bvh8_factory->BVH8Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b01*/ 1: accels_add(device->bvh8_factory->BVH8Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- else
-#endif
- {
- switch (mode) {
- case /*0b00*/ 0: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b01*/ 1: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- }
- else if (device->tri_accel_mb == "bvh4.triangle4imb") accels_add(device->bvh4_factory->BVH4Triangle4iMB(this));
- else if (device->tri_accel_mb == "bvh4.triangle4vmb") accels_add(device->bvh4_factory->BVH4Triangle4vMB(this));
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->tri_accel_mb == "bvh8.triangle4imb") accels_add(device->bvh8_factory->BVH8Triangle4iMB(this));
- else if (device->tri_accel_mb == "bvh8.triangle4vmb") accels_add(device->bvh8_factory->BVH8Triangle4vMB(this));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown motion blur triangle acceleration structure "+device->tri_accel_mb);
-#endif
- }
-
- void Scene::createQuadAccel()
- {
-#if defined(EMBREE_GEOMETRY_QUAD)
- if (device->quad_accel == "default")
- {
- if (quality_flags != RTC_BUILD_QUALITY_LOW)
- {
- /* static */
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
- switch (mode) {
- case /*0b00*/ 0:
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX())
- {
- if (quality_flags == RTC_BUILD_QUALITY_HIGH)
- accels_add(device->bvh8_factory->BVH8Quad4v(this,BVHFactory::BuildVariant::HIGH_QUALITY,BVHFactory::IntersectVariant::FAST));
- else
- accels_add(device->bvh8_factory->BVH8Quad4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST));
- }
- else
-#endif
- {
- if (quality_flags == RTC_BUILD_QUALITY_HIGH)
- accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::HIGH_QUALITY,BVHFactory::IntersectVariant::FAST));
- else
- accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST));
- }
- break;
-
- case /*0b01*/ 1:
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX())
- accels_add(device->bvh8_factory->BVH8Quad4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST));
- else
-#endif
- accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST));
- break;
-
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Quad4i(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Quad4i(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- else /* dynamic */
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX())
- {
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
- switch (mode) {
- case /*0b00*/ 0: accels_add(device->bvh8_factory->BVH8Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b01*/ 1: accels_add(device->bvh8_factory->BVH8Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- else
-#endif
- {
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
- switch (mode) {
- case /*0b00*/ 0: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b01*/ 1: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- }
- }
- else if (device->quad_accel == "bvh4.quad4v") accels_add(device->bvh4_factory->BVH4Quad4v(this));
- else if (device->quad_accel == "bvh4.quad4i") accels_add(device->bvh4_factory->BVH4Quad4i(this));
- else if (device->quad_accel == "qbvh4.quad4i") accels_add(device->bvh4_factory->BVH4QuantizedQuad4i(this));
-
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->quad_accel == "bvh8.quad4v") accels_add(device->bvh8_factory->BVH8Quad4v(this));
- else if (device->quad_accel == "bvh8.quad4i") accels_add(device->bvh8_factory->BVH8Quad4i(this));
- else if (device->quad_accel == "qbvh8.quad4i") accels_add(device->bvh8_factory->BVH8QuantizedQuad4i(this));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown quad acceleration structure "+device->quad_accel);
-#endif
- }
-
- void Scene::createQuadMBAccel()
- {
-#if defined(EMBREE_GEOMETRY_QUAD)
- if (device->quad_accel_mb == "default")
- {
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
- switch (mode) {
- case /*0b00*/ 0:
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX())
- accels_add(device->bvh8_factory->BVH8Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST));
- else
-#endif
- accels_add(device->bvh4_factory->BVH4Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST));
- break;
-
- case /*0b01*/ 1:
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX())
- accels_add(device->bvh8_factory->BVH8Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST));
- else
-#endif
- accels_add(device->bvh4_factory->BVH4Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST));
- break;
-
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- else if (device->quad_accel_mb == "bvh4.quad4imb") accels_add(device->bvh4_factory->BVH4Quad4iMB(this));
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->quad_accel_mb == "bvh8.quad4imb") accels_add(device->bvh8_factory->BVH8Quad4iMB(this));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown quad motion blur acceleration structure "+device->quad_accel_mb);
-#endif
- }
-
- void Scene::createHairAccel()
- {
-#if defined(EMBREE_GEOMETRY_CURVE) || defined(EMBREE_GEOMETRY_POINT)
- if (device->hair_accel == "default")
- {
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX2()) // only enable on HSW machines, for SNB this codepath is slower
- {
- switch (mode) {
- case /*0b00*/ 0: accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8v(this,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b01*/ 1: accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8v(this,BVHFactory::IntersectVariant::ROBUST)); break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve8i(this,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve8i(this,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- else
-#endif
- {
- switch (mode) {
- case /*0b00*/ 0: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4v(this,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b01*/ 1: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4v(this,BVHFactory::IntersectVariant::ROBUST)); break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4i(this,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4i(this,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- }
- else if (device->hair_accel == "bvh4obb.virtualcurve4v" ) accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4v(this,BVHFactory::IntersectVariant::FAST));
- else if (device->hair_accel == "bvh4obb.virtualcurve4i" ) accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4i(this,BVHFactory::IntersectVariant::FAST));
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->hair_accel == "bvh8obb.virtualcurve8v" ) accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8v(this,BVHFactory::IntersectVariant::FAST));
- else if (device->hair_accel == "bvh4obb.virtualcurve8i" ) accels_add(device->bvh4_factory->BVH4OBBVirtualCurve8i(this,BVHFactory::IntersectVariant::FAST));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown hair acceleration structure "+device->hair_accel);
-#endif
- }
-
- void Scene::createHairMBAccel()
- {
-#if defined(EMBREE_GEOMETRY_CURVE) || defined(EMBREE_GEOMETRY_POINT)
- if (device->hair_accel_mb == "default")
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX2()) // only enable on HSW machines, on SNB this codepath is slower
- {
- if (isRobustAccel()) accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8iMB(this,BVHFactory::IntersectVariant::ROBUST));
- else accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8iMB(this,BVHFactory::IntersectVariant::FAST));
- }
- else
-#endif
- {
- if (isRobustAccel()) accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4iMB(this,BVHFactory::IntersectVariant::ROBUST));
- else accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4iMB(this,BVHFactory::IntersectVariant::FAST));
- }
- }
- else if (device->hair_accel_mb == "bvh4.virtualcurve4imb") accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4iMB(this,BVHFactory::IntersectVariant::FAST));
-
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->hair_accel_mb == "bvh4.virtualcurve8imb") accels_add(device->bvh4_factory->BVH4OBBVirtualCurve8iMB(this,BVHFactory::IntersectVariant::FAST));
- else if (device->hair_accel_mb == "bvh8.virtualcurve8imb") accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8iMB(this,BVHFactory::IntersectVariant::FAST));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown motion blur hair acceleration structure "+device->hair_accel_mb);
-#endif
- }
-
- void Scene::createSubdivAccel()
- {
-#if defined(EMBREE_GEOMETRY_SUBDIVISION)
- if (device->subdiv_accel == "default") {
- accels_add(device->bvh4_factory->BVH4SubdivPatch1(this));
- }
- else if (device->subdiv_accel == "bvh4.grid.eager" ) accels_add(device->bvh4_factory->BVH4SubdivPatch1(this));
- else if (device->subdiv_accel == "bvh4.subdivpatch1eager" ) accels_add(device->bvh4_factory->BVH4SubdivPatch1(this));
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown subdiv accel "+device->subdiv_accel);
-#endif
- }
-
- void Scene::createSubdivMBAccel()
- {
-#if defined(EMBREE_GEOMETRY_SUBDIVISION)
- if (device->subdiv_accel_mb == "default") {
- accels_add(device->bvh4_factory->BVH4SubdivPatch1MB(this));
- }
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown subdiv mblur accel "+device->subdiv_accel_mb);
-#endif
- }
-
- void Scene::createUserGeometryAccel()
- {
-#if defined(EMBREE_GEOMETRY_USER)
- if (device->object_accel == "default")
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX() && !isCompactAccel())
- {
- if (quality_flags != RTC_BUILD_QUALITY_LOW) {
- accels_add(device->bvh8_factory->BVH8UserGeometry(this,BVHFactory::BuildVariant::STATIC));
- } else {
- accels_add(device->bvh8_factory->BVH8UserGeometry(this,BVHFactory::BuildVariant::DYNAMIC));
- }
- }
- else
-#endif
- {
- if (quality_flags != RTC_BUILD_QUALITY_LOW) {
- accels_add(device->bvh4_factory->BVH4UserGeometry(this,BVHFactory::BuildVariant::STATIC));
- } else {
- accels_add(device->bvh4_factory->BVH4UserGeometry(this,BVHFactory::BuildVariant::DYNAMIC));
- }
- }
- }
- else if (device->object_accel == "bvh4.object") accels_add(device->bvh4_factory->BVH4UserGeometry(this));
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->object_accel == "bvh8.object") accels_add(device->bvh8_factory->BVH8UserGeometry(this));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown user geometry accel "+device->object_accel);
-#endif
- }
-
- void Scene::createUserGeometryMBAccel()
- {
-#if defined(EMBREE_GEOMETRY_USER)
- if (device->object_accel_mb == "default" ) {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX() && !isCompactAccel())
- accels_add(device->bvh8_factory->BVH8UserGeometryMB(this));
- else
-#endif
- accels_add(device->bvh4_factory->BVH4UserGeometryMB(this));
- }
- else if (device->object_accel_mb == "bvh4.object") accels_add(device->bvh4_factory->BVH4UserGeometryMB(this));
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->object_accel_mb == "bvh8.object") accels_add(device->bvh8_factory->BVH8UserGeometryMB(this));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown user geometry mblur accel "+device->object_accel_mb);
-#endif
- }
-
- void Scene::createInstanceAccel()
- {
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- // if (device->object_accel == "default")
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX() && !isCompactAccel()) {
- if (quality_flags != RTC_BUILD_QUALITY_LOW) {
- accels_add(device->bvh8_factory->BVH8Instance(this, false, BVHFactory::BuildVariant::STATIC));
- } else {
- accels_add(device->bvh8_factory->BVH8Instance(this, false, BVHFactory::BuildVariant::DYNAMIC));
- }
- }
- else
-#endif
- {
- if (quality_flags != RTC_BUILD_QUALITY_LOW) {
- accels_add(device->bvh4_factory->BVH4Instance(this, false, BVHFactory::BuildVariant::STATIC));
- } else {
- accels_add(device->bvh4_factory->BVH4Instance(this, false, BVHFactory::BuildVariant::DYNAMIC));
- }
- }
- }
- // else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown instance accel "+device->instance_accel);
-#endif
- }
-
- void Scene::createInstanceMBAccel()
- {
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- //if (device->instance_accel_mb == "default")
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX() && !isCompactAccel())
- accels_add(device->bvh8_factory->BVH8InstanceMB(this, false));
- else
-#endif
- accels_add(device->bvh4_factory->BVH4InstanceMB(this, false));
- }
- //else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown instance mblur accel "+device->instance_accel_mb);
-#endif
- }
-
- void Scene::createInstanceExpensiveAccel()
- {
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- // if (device->object_accel == "default")
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX() && !isCompactAccel()) {
- if (quality_flags != RTC_BUILD_QUALITY_LOW) {
- accels_add(device->bvh8_factory->BVH8Instance(this, true, BVHFactory::BuildVariant::STATIC));
- } else {
- accels_add(device->bvh8_factory->BVH8Instance(this, true, BVHFactory::BuildVariant::DYNAMIC));
- }
- }
- else
-#endif
- {
- if (quality_flags != RTC_BUILD_QUALITY_LOW) {
- accels_add(device->bvh4_factory->BVH4Instance(this, true, BVHFactory::BuildVariant::STATIC));
- } else {
- accels_add(device->bvh4_factory->BVH4Instance(this, true, BVHFactory::BuildVariant::DYNAMIC));
- }
- }
- }
- // else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown instance accel "+device->instance_accel);
-#endif
- }
-
- void Scene::createInstanceExpensiveMBAccel()
- {
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- //if (device->instance_accel_mb == "default")
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX() && !isCompactAccel())
- accels_add(device->bvh8_factory->BVH8InstanceMB(this, true));
- else
-#endif
- accels_add(device->bvh4_factory->BVH4InstanceMB(this, true));
- }
- //else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown instance mblur accel "+device->instance_accel_mb);
-#endif
- }
-
- void Scene::createGridAccel()
- {
- BVHFactory::IntersectVariant ivariant = isRobustAccel() ? BVHFactory::IntersectVariant::ROBUST : BVHFactory::IntersectVariant::FAST;
-#if defined(EMBREE_GEOMETRY_GRID)
- if (device->grid_accel == "default")
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX() && !isCompactAccel())
- {
- accels_add(device->bvh8_factory->BVH8Grid(this,BVHFactory::BuildVariant::STATIC,ivariant));
- }
- else
-#endif
- {
- accels_add(device->bvh4_factory->BVH4Grid(this,BVHFactory::BuildVariant::STATIC,ivariant));
- }
- }
- else if (device->grid_accel == "bvh4.grid") accels_add(device->bvh4_factory->BVH4Grid(this,BVHFactory::BuildVariant::STATIC,ivariant));
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->grid_accel == "bvh8.grid") accels_add(device->bvh8_factory->BVH8Grid(this,BVHFactory::BuildVariant::STATIC,ivariant));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown grid accel "+device->grid_accel);
-#endif
-
- }
-
- void Scene::createGridMBAccel()
- {
-#if defined(EMBREE_GEOMETRY_GRID)
- if (device->grid_accel_mb == "default")
- {
- accels_add(device->bvh4_factory->BVH4GridMB(this,BVHFactory::BuildVariant::STATIC));
- }
- else if (device->grid_accel_mb == "bvh4mb.grid") accels_add(device->bvh4_factory->BVH4GridMB(this));
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown grid mb accel "+device->grid_accel);
-#endif
-
- }
-
- void Scene::clear() {
- }
-
- unsigned Scene::bind(unsigned geomID, Ref<Geometry> geometry)
- {
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(geometriesMutex);
-#else
- Lock<SpinLock> lock(geometriesMutex);
-#endif
- if (geomID == RTC_INVALID_GEOMETRY_ID) {
- geomID = id_pool.allocate();
- if (geomID == RTC_INVALID_GEOMETRY_ID)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"too many geometries inside scene");
- }
- else
- {
- if (!id_pool.add(geomID))
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid geometry ID provided");
- }
- if (geomID >= geometries.size()) {
- geometries.resize(geomID+1);
- vertices.resize(geomID+1);
- geometryModCounters_.resize(geomID+1);
- }
- geometries[geomID] = geometry;
- geometryModCounters_[geomID] = 0;
- if (geometry->isEnabled()) {
- setModified ();
- }
- return geomID;
- }
-
- void Scene::detachGeometry(size_t geomID)
- {
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(geometriesMutex);
-#else
- Lock<SpinLock> lock(geometriesMutex);
-#endif
-
- if (geomID >= geometries.size())
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid geometry ID");
-
- Ref<Geometry>& geometry = geometries[geomID];
- if (geometry == null)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid geometry");
-
- if (geometry->isEnabled()) {
- setModified ();
- }
- accels_deleteGeometry(unsigned(geomID));
- id_pool.deallocate((unsigned)geomID);
- geometries[geomID] = null;
- vertices[geomID] = nullptr;
- geometryModCounters_[geomID] = 0;
- }
-
- void Scene::updateInterface()
- {
- is_build = true;
- }
-
- void Scene::commit_task ()
- {
- checkIfModifiedAndSet ();
- if (!isModified()) {
- return;
- }
-
- /* print scene statistics */
- if (device->verbosity(2))
- printStatistics();
-
- progress_monitor_counter = 0;
-
- /* gather scene stats and call preCommit function of each geometry */
- this->world = parallel_reduce (size_t(0), geometries.size(), GeometryCounts (),
- [this](const range<size_t>& r)->GeometryCounts
- {
- GeometryCounts c;
- for (auto i=r.begin(); i<r.end(); ++i)
- {
- if (geometries[i] && geometries[i]->isEnabled())
- {
- geometries[i]->preCommit();
- geometries[i]->addElementsToCount (c);
- c.numFilterFunctions += (int) geometries[i]->hasFilterFunctions();
- }
- }
- return c;
- },
- std::plus<GeometryCounts>()
- );
-
- /* select acceleration structures to build */
- unsigned int new_enabled_geometry_types = world.enabledGeometryTypesMask();
- if (flags_modified || new_enabled_geometry_types != enabled_geometry_types)
- {
- accels_init();
-
- /* we need to make all geometries modified, otherwise two level builder will
- not rebuild currently not modified geometries */
- parallel_for(geometryModCounters_.size(), [&] ( const size_t i ) {
- geometryModCounters_[i] = 0;
- });
-
- if (getNumPrimitives(TriangleMesh::geom_type,false)) createTriangleAccel();
- if (getNumPrimitives(TriangleMesh::geom_type,true)) createTriangleMBAccel();
- if (getNumPrimitives(QuadMesh::geom_type,false)) createQuadAccel();
- if (getNumPrimitives(QuadMesh::geom_type,true)) createQuadMBAccel();
- if (getNumPrimitives(GridMesh::geom_type,false)) createGridAccel();
- if (getNumPrimitives(GridMesh::geom_type,true)) createGridMBAccel();
- if (getNumPrimitives(SubdivMesh::geom_type,false)) createSubdivAccel();
- if (getNumPrimitives(SubdivMesh::geom_type,true)) createSubdivMBAccel();
- if (getNumPrimitives(Geometry::MTY_CURVES,false)) createHairAccel();
- if (getNumPrimitives(Geometry::MTY_CURVES,true)) createHairMBAccel();
- if (getNumPrimitives(UserGeometry::geom_type,false)) createUserGeometryAccel();
- if (getNumPrimitives(UserGeometry::geom_type,true)) createUserGeometryMBAccel();
- if (getNumPrimitives(Geometry::MTY_INSTANCE_CHEAP,false)) createInstanceAccel();
- if (getNumPrimitives(Geometry::MTY_INSTANCE_CHEAP,true)) createInstanceMBAccel();
- if (getNumPrimitives(Geometry::MTY_INSTANCE_EXPENSIVE,false)) createInstanceExpensiveAccel();
- if (getNumPrimitives(Geometry::MTY_INSTANCE_EXPENSIVE,true)) createInstanceExpensiveMBAccel();
-
- flags_modified = false;
- enabled_geometry_types = new_enabled_geometry_types;
- }
-
- /* select fast code path if no filter function is present */
- accels_select(hasFilterFunction());
-
- /* build all hierarchies of this scene */
- accels_build();
-
- /* make static geometry immutable */
- if (!isDynamicAccel()) {
- accels_immutable();
- flags_modified = true; // in non-dynamic mode we have to re-create accels
- }
-
- /* call postCommit function of each geometry */
- parallel_for(geometries.size(), [&] ( const size_t i ) {
- if (geometries[i] && geometries[i]->isEnabled()) {
- geometries[i]->postCommit();
- vertices[i] = geometries[i]->getCompactVertexArray();
- geometryModCounters_[i] = geometries[i]->getModCounter();
- }
- });
-
- updateInterface();
-
- if (device->verbosity(2)) {
- std::cout << "created scene intersector" << std::endl;
- accels_print(2);
- std::cout << "selected scene intersector" << std::endl;
- intersectors.print(2);
- }
-
- setModified(false);
- }
-
- void Scene::setBuildQuality(RTCBuildQuality quality_flags_i)
- {
- if (quality_flags == quality_flags_i) return;
- quality_flags = quality_flags_i;
- flags_modified = true;
- }
-
- RTCBuildQuality Scene::getBuildQuality() const {
- return quality_flags;
- }
-
- void Scene::setSceneFlags(RTCSceneFlags scene_flags_i)
- {
- if (scene_flags == scene_flags_i) return;
- scene_flags = scene_flags_i;
- flags_modified = true;
- }
-
- RTCSceneFlags Scene::getSceneFlags() const {
- return scene_flags;
- }
-
-#if defined(TASKING_INTERNAL)
-
- void Scene::commit (bool join)
- {
- Lock<MutexSys> buildLock(buildMutex,false);
-
- /* allocates own taskscheduler for each build */
- Ref<TaskScheduler> scheduler = nullptr;
- {
- Lock<MutexSys> lock(schedulerMutex);
- scheduler = this->scheduler;
- if (scheduler == null) {
- buildLock.lock();
- this->scheduler = scheduler = new TaskScheduler;
- }
- }
-
- /* worker threads join build */
- if (!buildLock.isLocked())
- {
- if (!join)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"use rtcJoinCommitScene to join a build operation");
-
- scheduler->join();
- return;
- }
-
- /* initiate build */
- // -- GODOT start --
- // try {
- scheduler->spawn_root([&]() { commit_task(); Lock<MutexSys> lock(schedulerMutex); this->scheduler = nullptr; }, 1, !join);
- // }
- // catch (...) {
- // accels_clear();
- // updateInterface();
- // Lock<MutexSys> lock(schedulerMutex);
- // this->scheduler = nullptr;
- // throw;
- // }
- // -- GODOT end --
- }
-
-#endif
-
-#if defined(TASKING_TBB) || defined(TASKING_GCD)
-
- void Scene::commit (bool join)
- {
-#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8)
- if (join)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcJoinCommitScene not supported with this TBB version");
-#endif
-
- /* try to obtain build lock */
- Lock<MutexSys> lock(buildMutex,buildMutex.try_lock());
-
- /* join hierarchy build */
- if (!lock.isLocked())
- {
-#if !TASKING_TBB_USE_TASK_ISOLATION
- if (!join)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invoking rtcCommitScene from multiple threads is not supported with this TBB version");
-#endif
-
- do {
-
-#if defined(TASKING_GCD)
- // Do Nothing
-#else
-#if USE_TASK_ARENA
- if (join) {
- device->arena->execute([&]{ group.wait(); });
- }
- else
-#endif
- {
- group.wait();
- }
-#endif
-
- pause_cpu();
- yield();
-
- } while (!buildMutex.try_lock());
-
- buildMutex.unlock();
- return;
- }
-
- /* for best performance set FTZ and DAZ flags in the MXCSR control and status register */
- const unsigned int mxcsr = _mm_getcsr();
- _mm_setcsr(mxcsr | /* FTZ */ (1<<15) | /* DAZ */ (1<<6));
-
- try {
-#if defined(TASKING_TBB)
-#if TBB_INTERFACE_VERSION_MAJOR < 8
- tbb::task_group_context ctx( tbb::task_group_context::isolated, tbb::task_group_context::default_traits);
-#else
- tbb::task_group_context ctx( tbb::task_group_context::isolated, tbb::task_group_context::default_traits | tbb::task_group_context::fp_settings );
-#endif
- //ctx.set_priority(tbb::priority_high);
-
-#if USE_TASK_ARENA
- if (join)
- {
- device->arena->execute([&]{
- group.run([&]{
- tbb::parallel_for (size_t(0), size_t(1), size_t(1), [&] (size_t) { commit_task(); }, ctx);
- });
- group.wait();
- });
- }
- else
-#endif
- {
- group.run([&]{
- tbb::parallel_for (size_t(0), size_t(1), size_t(1), [&] (size_t) { commit_task(); }, ctx);
- });
- group.wait();
- }
-
- /* reset MXCSR register again */
- _mm_setcsr(mxcsr);
-
-#elif defined(TASKING_GCD)
-
- commit_task();
-
-#endif // #if defined(TASKING_TBB)
-
- }
- catch (...)
- {
- /* reset MXCSR register again */
- _mm_setcsr(mxcsr);
-
- accels_clear();
- updateInterface();
- throw;
- }
- }
-#endif
-
-#if defined(TASKING_PPL)
-
- void Scene::commit (bool join)
- {
-#if defined(TASKING_PPL)
- if (join)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcJoinCommitScene not supported with PPL");
-#endif
-
- /* try to obtain build lock */
- Lock<MutexSys> lock(buildMutex);
-
- checkIfModifiedAndSet ();
- if (!isModified()) {
- return;
- }
-
- /* for best performance set FTZ and DAZ flags in the MXCSR control and status register */
- const unsigned int mxcsr = _mm_getcsr();
- _mm_setcsr(mxcsr | /* FTZ */ (1<<15) | /* DAZ */ (1<<6));
-
- try {
-
- group.run([&]{
- concurrency::parallel_for(size_t(0), size_t(1), size_t(1), [&](size_t) { commit_task(); });
- });
- group.wait();
-
- /* reset MXCSR register again */
- _mm_setcsr(mxcsr);
- }
- catch (...)
- {
- /* reset MXCSR register again */
- _mm_setcsr(mxcsr);
-
- accels_clear();
- updateInterface();
- throw;
- }
- }
-#endif
-
- void Scene::setProgressMonitorFunction(RTCProgressMonitorFunction func, void* ptr)
- {
- progress_monitor_function = func;
- progress_monitor_ptr = ptr;
- }
-
- void Scene::progressMonitor(double dn)
- {
- if (progress_monitor_function) {
- size_t n = size_t(dn) + progress_monitor_counter.fetch_add(size_t(dn));
- if (!progress_monitor_function(progress_monitor_ptr, n / (double(numPrimitives())))) {
- throw_RTCError(RTC_ERROR_CANCELLED,"progress monitor forced termination");
- }
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene.h b/thirdparty/embree-aarch64/kernels/common/scene.h
deleted file mode 100644
index b41c6cde91..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene.h
+++ /dev/null
@@ -1,390 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "device.h"
-#include "builder.h"
-#include "../../common/algorithms/parallel_any_of.h"
-#include "scene_triangle_mesh.h"
-#include "scene_quad_mesh.h"
-#include "scene_user_geometry.h"
-#include "scene_instance.h"
-#include "scene_curves.h"
-#include "scene_line_segments.h"
-#include "scene_subdiv_mesh.h"
-#include "scene_grid_mesh.h"
-#include "scene_points.h"
-#include "../subdiv/tessellation_cache.h"
-
-#include "acceln.h"
-#include "geometry.h"
-
-namespace embree
-{
- /*! Base class all scenes are derived from */
- class Scene : public AccelN
- {
- ALIGNED_CLASS_(std::alignment_of<Scene>::value);
-
- public:
- template<typename Ty, bool mblur = false>
- class Iterator
- {
- public:
- Iterator () {}
-
- Iterator (Scene* scene, bool all = false)
- : scene(scene), all(all) {}
-
- __forceinline Ty* at(const size_t i)
- {
- Geometry* geom = scene->geometries[i].ptr;
- if (geom == nullptr) return nullptr;
- if (!all && !geom->isEnabled()) return nullptr;
- const size_t mask = geom->getTypeMask() & Ty::geom_type;
- if (!(mask)) return nullptr;
- if ((geom->numTimeSteps != 1) != mblur) return nullptr;
- return (Ty*) geom;
- }
-
- __forceinline Ty* operator[] (const size_t i) {
- return at(i);
- }
-
- __forceinline size_t size() const {
- return scene->size();
- }
-
- __forceinline size_t numPrimitives() const {
- return scene->getNumPrimitives(Ty::geom_type,mblur);
- }
-
- __forceinline size_t maxPrimitivesPerGeometry()
- {
- size_t ret = 0;
- for (size_t i=0; i<scene->size(); i++) {
- Ty* mesh = at(i);
- if (mesh == nullptr) continue;
- ret = max(ret,mesh->size());
- }
- return ret;
- }
-
- __forceinline unsigned int maxGeomID()
- {
- unsigned int ret = 0;
- for (size_t i=0; i<scene->size(); i++) {
- Ty* mesh = at(i);
- if (mesh == nullptr) continue;
- ret = max(ret,(unsigned int)i);
- }
- return ret;
- }
-
- __forceinline unsigned maxTimeStepsPerGeometry()
- {
- unsigned ret = 0;
- for (size_t i=0; i<scene->size(); i++) {
- Ty* mesh = at(i);
- if (mesh == nullptr) continue;
- ret = max(ret,mesh->numTimeSteps);
- }
- return ret;
- }
-
- private:
- Scene* scene;
- bool all;
- };
-
- class Iterator2
- {
- public:
- Iterator2 () {}
-
- Iterator2 (Scene* scene, Geometry::GTypeMask typemask, bool mblur)
- : scene(scene), typemask(typemask), mblur(mblur) {}
-
- __forceinline Geometry* at(const size_t i)
- {
- Geometry* geom = scene->geometries[i].ptr;
- if (geom == nullptr) return nullptr;
- if (!geom->isEnabled()) return nullptr;
- if (!(geom->getTypeMask() & typemask)) return nullptr;
- if ((geom->numTimeSteps != 1) != mblur) return nullptr;
- return geom;
- }
-
- __forceinline Geometry* operator[] (const size_t i) {
- return at(i);
- }
-
- __forceinline size_t size() const {
- return scene->size();
- }
-
- private:
- Scene* scene;
- Geometry::GTypeMask typemask;
- bool mblur;
- };
-
- public:
-
- /*! Scene construction */
- Scene (Device* device);
-
- /*! Scene destruction */
- ~Scene () noexcept;
-
- private:
- /*! class is non-copyable */
- Scene (const Scene& other) DELETED; // do not implement
- Scene& operator= (const Scene& other) DELETED; // do not implement
-
- public:
- void createTriangleAccel();
- void createTriangleMBAccel();
- void createQuadAccel();
- void createQuadMBAccel();
- void createHairAccel();
- void createHairMBAccel();
- void createSubdivAccel();
- void createSubdivMBAccel();
- void createUserGeometryAccel();
- void createUserGeometryMBAccel();
- void createInstanceAccel();
- void createInstanceMBAccel();
- void createInstanceExpensiveAccel();
- void createInstanceExpensiveMBAccel();
- void createGridAccel();
- void createGridMBAccel();
-
- /*! prints statistics about the scene */
- void printStatistics();
-
- /*! clears the scene */
- void clear();
-
- /*! detaches some geometry */
- void detachGeometry(size_t geomID);
-
- void setBuildQuality(RTCBuildQuality quality_flags);
- RTCBuildQuality getBuildQuality() const;
-
- void setSceneFlags(RTCSceneFlags scene_flags);
- RTCSceneFlags getSceneFlags() const;
-
- void commit (bool join);
- void commit_task ();
- void build () {}
-
- void updateInterface();
-
- /* return number of geometries */
- __forceinline size_t size() const { return geometries.size(); }
-
- /* bind geometry to the scene */
- unsigned int bind (unsigned geomID, Ref<Geometry> geometry);
-
- /* determines if scene is modified */
- __forceinline bool isModified() const { return modified; }
-
- /* sets modified flag */
- __forceinline void setModified(bool f = true) {
- modified = f;
- }
-
- __forceinline bool isGeometryModified(size_t geomID)
- {
- Ref<Geometry>& g = geometries[geomID];
- if (!g) return false;
- return g->getModCounter() > geometryModCounters_[geomID];
- }
-
- protected:
-
- __forceinline void checkIfModifiedAndSet ()
- {
- if (isModified ()) return;
-
- auto geometryIsModified = [this](size_t geomID)->bool {
- return isGeometryModified(geomID);
- };
-
- if (parallel_any_of (size_t(0), geometries.size (), geometryIsModified)) {
- setModified ();
- }
- }
-
- public:
-
- /* get mesh by ID */
- __forceinline Geometry* get(size_t i) { assert(i < geometries.size()); return geometries[i].ptr; }
- __forceinline const Geometry* get(size_t i) const { assert(i < geometries.size()); return geometries[i].ptr; }
-
- template<typename Mesh>
- __forceinline Mesh* get(size_t i) {
- assert(i < geometries.size());
- assert(geometries[i]->getTypeMask() & Mesh::geom_type);
- return (Mesh*)geometries[i].ptr;
- }
- template<typename Mesh>
- __forceinline const Mesh* get(size_t i) const {
- assert(i < geometries.size());
- assert(geometries[i]->getTypeMask() & Mesh::geom_type);
- return (Mesh*)geometries[i].ptr;
- }
-
- template<typename Mesh>
- __forceinline Mesh* getSafe(size_t i) {
- assert(i < geometries.size());
- if (geometries[i] == null) return nullptr;
- if (!(geometries[i]->getTypeMask() & Mesh::geom_type)) return nullptr;
- else return (Mesh*) geometries[i].ptr;
- }
-
- __forceinline Ref<Geometry> get_locked(size_t i) {
- Lock<SpinLock> lock(geometriesMutex);
- assert(i < geometries.size());
- return geometries[i];
- }
-
- /* flag decoding */
- __forceinline bool isFastAccel() const { return !isCompactAccel() && !isRobustAccel(); }
- __forceinline bool isCompactAccel() const { return scene_flags & RTC_SCENE_FLAG_COMPACT; }
- __forceinline bool isRobustAccel() const { return scene_flags & RTC_SCENE_FLAG_ROBUST; }
- __forceinline bool isStaticAccel() const { return !(scene_flags & RTC_SCENE_FLAG_DYNAMIC); }
- __forceinline bool isDynamicAccel() const { return scene_flags & RTC_SCENE_FLAG_DYNAMIC; }
-
- __forceinline bool hasContextFilterFunction() const {
- return scene_flags & RTC_SCENE_FLAG_CONTEXT_FILTER_FUNCTION;
- }
-
- __forceinline bool hasGeometryFilterFunction() {
- return world.numFilterFunctions != 0;
- }
-
- __forceinline bool hasFilterFunction() {
- return hasContextFilterFunction() || hasGeometryFilterFunction();
- }
-
- /* test if scene got already build */
- __forceinline bool isBuild() const { return is_build; }
-
- public:
- IDPool<unsigned,0xFFFFFFFE> id_pool;
- vector<Ref<Geometry>> geometries; //!< list of all user geometries
- vector<unsigned int> geometryModCounters_;
- vector<float*> vertices;
-
- public:
- Device* device;
-
- /* these are to detect if we need to recreate the acceleration structures */
- bool flags_modified;
- unsigned int enabled_geometry_types;
-
- RTCSceneFlags scene_flags;
- RTCBuildQuality quality_flags;
- MutexSys buildMutex;
- SpinLock geometriesMutex;
- bool is_build;
- private:
- bool modified; //!< true if scene got modified
-
- public:
-
- /*! global lock step task scheduler */
-#if defined(TASKING_INTERNAL)
- MutexSys schedulerMutex;
- Ref<TaskScheduler> scheduler;
-#elif defined(TASKING_TBB) && TASKING_TBB_USE_TASK_ISOLATION
- tbb::isolated_task_group group;
-#elif defined(TASKING_TBB)
- tbb::task_group group;
-#elif defined(TASKING_PPL)
- concurrency::task_group group;
-#endif
-
- public:
- struct BuildProgressMonitorInterface : public BuildProgressMonitor {
- BuildProgressMonitorInterface(Scene* scene)
- : scene(scene) {}
- void operator() (size_t dn) const { scene->progressMonitor(double(dn)); }
- private:
- Scene* scene;
- };
- BuildProgressMonitorInterface progressInterface;
- RTCProgressMonitorFunction progress_monitor_function;
- void* progress_monitor_ptr;
- std::atomic<size_t> progress_monitor_counter;
- void progressMonitor(double nprims);
- void setProgressMonitorFunction(RTCProgressMonitorFunction func, void* ptr);
-
- private:
- GeometryCounts world; //!< counts for geometry
-
- public:
-
- __forceinline size_t numPrimitives() const {
- return world.size();
- }
-
- __forceinline size_t getNumPrimitives(Geometry::GTypeMask mask, bool mblur) const
- {
- size_t count = 0;
-
- if (mask & Geometry::MTY_TRIANGLE_MESH)
- count += mblur ? world.numMBTriangles : world.numTriangles;
-
- if (mask & Geometry::MTY_QUAD_MESH)
- count += mblur ? world.numMBQuads : world.numQuads;
-
- if (mask & Geometry::MTY_CURVE2)
- count += mblur ? world.numMBLineSegments : world.numLineSegments;
-
- if (mask & Geometry::MTY_CURVE4)
- count += mblur ? world.numMBBezierCurves : world.numBezierCurves;
-
- if (mask & Geometry::MTY_POINTS)
- count += mblur ? world.numMBPoints : world.numPoints;
-
- if (mask & Geometry::MTY_SUBDIV_MESH)
- count += mblur ? world.numMBSubdivPatches : world.numSubdivPatches;
-
- if (mask & Geometry::MTY_USER_GEOMETRY)
- count += mblur ? world.numMBUserGeometries : world.numUserGeometries;
-
- if (mask & Geometry::MTY_INSTANCE_CHEAP)
- count += mblur ? world.numMBInstancesCheap : world.numInstancesCheap;
-
- if (mask & Geometry::MTY_INSTANCE_EXPENSIVE)
- count += mblur ? world.numMBInstancesExpensive : world.numInstancesExpensive;
-
- if (mask & Geometry::MTY_GRID_MESH)
- count += mblur ? world.numMBGrids : world.numGrids;
-
- return count;
- }
-
- template<typename Mesh, bool mblur>
- __forceinline unsigned getNumTimeSteps()
- {
- if (!mblur)
- return 1;
-
- Scene::Iterator<Mesh,mblur> iter(this);
- return iter.maxTimeStepsPerGeometry();
- }
-
- template<typename Mesh, bool mblur>
- __forceinline unsigned int getMaxGeomID()
- {
- Scene::Iterator<Mesh,mblur> iter(this);
- return iter.maxGeomID();
- }
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_curves.h b/thirdparty/embree-aarch64/kernels/common/scene_curves.h
deleted file mode 100644
index 2649ab0e3e..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_curves.h
+++ /dev/null
@@ -1,341 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "geometry.h"
-#include "buffer.h"
-
-namespace embree
-{
- /*! represents an array of bicubic bezier curves */
- struct CurveGeometry : public Geometry
- {
- /*! type of this geometry */
- static const Geometry::GTypeMask geom_type = Geometry::MTY_CURVE4;
-
- public:
-
- /*! bezier curve construction */
- CurveGeometry (Device* device, Geometry::GType gtype);
-
- public:
- void setMask(unsigned mask);
- void setNumTimeSteps (unsigned int numTimeSteps);
- void setVertexAttributeCount (unsigned int N);
- void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
- void* getBuffer(RTCBufferType type, unsigned int slot);
- void updateBuffer(RTCBufferType type, unsigned int slot);
- void commit();
- bool verify();
- void setTessellationRate(float N);
- void setMaxRadiusScale(float s);
- void addElementsToCount (GeometryCounts & counts) const;
-
- public:
-
- /*! returns the number of vertices */
- __forceinline size_t numVertices() const {
- return vertices[0].size();
- }
-
- /*! returns the i'th curve */
- __forceinline const unsigned int& curve(size_t i) const {
- return curves[i];
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline Vec3ff vertex(size_t i) const {
- return vertices0[i];
- }
-
- /*! returns i'th normal of the first time step */
- __forceinline Vec3fa normal(size_t i) const {
- return normals0[i];
- }
-
- /*! returns i'th tangent of the first time step */
- __forceinline Vec3ff tangent(size_t i) const {
- return tangents0[i];
- }
-
- /*! returns i'th normal derivative of the first time step */
- __forceinline Vec3fa dnormal(size_t i) const {
- return dnormals0[i];
- }
-
- /*! returns i'th radius of the first time step */
- __forceinline float radius(size_t i) const {
- return vertices0[i].w;
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline Vec3ff vertex(size_t i, size_t itime) const {
- return vertices[itime][i];
- }
-
- /*! returns i'th normal of itime'th timestep */
- __forceinline Vec3fa normal(size_t i, size_t itime) const {
- return normals[itime][i];
- }
-
- /*! returns i'th tangent of itime'th timestep */
- __forceinline Vec3ff tangent(size_t i, size_t itime) const {
- return tangents[itime][i];
- }
-
- /*! returns i'th normal derivative of itime'th timestep */
- __forceinline Vec3fa dnormal(size_t i, size_t itime) const {
- return dnormals[itime][i];
- }
-
- /*! returns i'th radius of itime'th timestep */
- __forceinline float radius(size_t i, size_t itime) const {
- return vertices[itime][i].w;
- }
-
- /*! gathers the curve starting with i'th vertex */
- __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i) const
- {
- p0 = vertex(i+0);
- p1 = vertex(i+1);
- p2 = vertex(i+2);
- p3 = vertex(i+3);
- }
-
- /*! gathers the curve starting with i'th vertex of itime'th timestep */
- __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, size_t itime) const
- {
- p0 = vertex(i+0,itime);
- p1 = vertex(i+1,itime);
- p2 = vertex(i+2,itime);
- p3 = vertex(i+3,itime);
- }
-
- /*! gathers the curve starting with i'th vertex */
- __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i) const
- {
- p0 = vertex(i+0);
- p1 = vertex(i+1);
- p2 = vertex(i+2);
- p3 = vertex(i+3);
- n0 = normal(i+0);
- n1 = normal(i+1);
- n2 = normal(i+2);
- n3 = normal(i+3);
- }
-
- /*! gathers the curve starting with i'th vertex of itime'th timestep */
- __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, size_t itime) const
- {
- p0 = vertex(i+0,itime);
- p1 = vertex(i+1,itime);
- p2 = vertex(i+2,itime);
- p3 = vertex(i+3,itime);
- n0 = normal(i+0,itime);
- n1 = normal(i+1,itime);
- n2 = normal(i+2,itime);
- n3 = normal(i+3,itime);
- }
-
- /*! prefetches the curve starting with i'th vertex of itime'th timestep */
- __forceinline void prefetchL1_vertices(size_t i) const
- {
- prefetchL1(vertices0.getPtr(i)+0);
- prefetchL1(vertices0.getPtr(i)+64);
- }
-
- /*! prefetches the curve starting with i'th vertex of itime'th timestep */
- __forceinline void prefetchL2_vertices(size_t i) const
- {
- prefetchL2(vertices0.getPtr(i)+0);
- prefetchL2(vertices0.getPtr(i)+64);
- }
-
- /*! loads curve vertices for specified time */
- __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, float time) const
- {
- float ftime;
- const size_t itime = timeSegment(time, ftime);
-
- const float t0 = 1.0f - ftime;
- const float t1 = ftime;
- Vec3ff a0,a1,a2,a3;
- gather(a0,a1,a2,a3,i,itime);
- Vec3ff b0,b1,b2,b3;
- gather(b0,b1,b2,b3,i,itime+1);
- p0 = madd(Vec3ff(t0),a0,t1*b0);
- p1 = madd(Vec3ff(t0),a1,t1*b1);
- p2 = madd(Vec3ff(t0),a2,t1*b2);
- p3 = madd(Vec3ff(t0),a3,t1*b3);
- }
-
- /*! loads curve vertices for specified time */
- __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, float time) const
- {
- float ftime;
- const size_t itime = timeSegment(time, ftime);
-
- const float t0 = 1.0f - ftime;
- const float t1 = ftime;
- Vec3ff a0,a1,a2,a3; Vec3fa an0,an1,an2,an3;
- gather(a0,a1,a2,a3,an0,an1,an2,an3,i,itime);
- Vec3ff b0,b1,b2,b3; Vec3fa bn0,bn1,bn2,bn3;
- gather(b0,b1,b2,b3,bn0,bn1,bn2,bn3,i,itime+1);
- p0 = madd(Vec3ff(t0),a0,t1*b0);
- p1 = madd(Vec3ff(t0),a1,t1*b1);
- p2 = madd(Vec3ff(t0),a2,t1*b2);
- p3 = madd(Vec3ff(t0),a3,t1*b3);
- n0 = madd(Vec3ff(t0),an0,t1*bn0);
- n1 = madd(Vec3ff(t0),an1,t1*bn1);
- n2 = madd(Vec3ff(t0),an2,t1*bn2);
- n3 = madd(Vec3ff(t0),an3,t1*bn3);
- }
-
- template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
- __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const size_t itime) const
- {
- Vec3ff v0,v1,v2,v3; Vec3fa n0,n1,n2,n3;
- unsigned int vertexID = curve(primID);
- gather(v0,v1,v2,v3,n0,n1,n2,n3,vertexID,itime);
- SourceCurve3ff ccurve(v0,v1,v2,v3);
- SourceCurve3fa ncurve(n0,n1,n2,n3);
- ccurve = enlargeRadiusToMinWidth(context,this,ray_org,ccurve);
- return TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
- }
-
- template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
- __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const
- {
- float ftime;
- const size_t itime = timeSegment(time, ftime);
- const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+0);
- const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+1);
- return clerp(curve0,curve1,ftime);
- }
-
- /*! gathers the hermite curve starting with i'th vertex */
- __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i) const
- {
- p0 = vertex (i+0);
- p1 = vertex (i+1);
- t0 = tangent(i+0);
- t1 = tangent(i+1);
- }
-
- /*! gathers the hermite curve starting with i'th vertex of itime'th timestep */
- __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, size_t itime) const
- {
- p0 = vertex (i+0,itime);
- p1 = vertex (i+1,itime);
- t0 = tangent(i+0,itime);
- t1 = tangent(i+1,itime);
- }
-
- /*! loads curve vertices for specified time */
- __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, float time) const
- {
- float ftime;
- const size_t itime = timeSegment(time, ftime);
- const float f0 = 1.0f - ftime, f1 = ftime;
- Vec3ff ap0,at0,ap1,at1;
- gather_hermite(ap0,at0,ap1,at1,i,itime);
- Vec3ff bp0,bt0,bp1,bt1;
- gather_hermite(bp0,bt0,bp1,bt1,i,itime+1);
- p0 = madd(Vec3ff(f0),ap0,f1*bp0);
- t0 = madd(Vec3ff(f0),at0,f1*bt0);
- p1 = madd(Vec3ff(f0),ap1,f1*bp1);
- t1 = madd(Vec3ff(f0),at1,f1*bt1);
- }
-
- /*! gathers the hermite curve starting with i'th vertex */
- __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i) const
- {
- p0 = vertex (i+0);
- p1 = vertex (i+1);
- t0 = tangent(i+0);
- t1 = tangent(i+1);
- n0 = normal(i+0);
- n1 = normal(i+1);
- dn0 = dnormal(i+0);
- dn1 = dnormal(i+1);
- }
-
- /*! gathers the hermite curve starting with i'th vertex of itime'th timestep */
- __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, size_t itime) const
- {
- p0 = vertex (i+0,itime);
- p1 = vertex (i+1,itime);
- t0 = tangent(i+0,itime);
- t1 = tangent(i+1,itime);
- n0 = normal(i+0,itime);
- n1 = normal(i+1,itime);
- dn0 = dnormal(i+0,itime);
- dn1 = dnormal(i+1,itime);
- }
-
- /*! loads curve vertices for specified time */
- __forceinline void gather_hermite(Vec3ff& p0, Vec3fa& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3fa& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, float time) const
- {
- float ftime;
- const size_t itime = timeSegment(time, ftime);
- const float f0 = 1.0f - ftime, f1 = ftime;
- Vec3ff ap0,at0,ap1,at1; Vec3fa an0,adn0,an1,adn1;
- gather_hermite(ap0,at0,an0,adn0,ap1,at1,an1,adn1,i,itime);
- Vec3ff bp0,bt0,bp1,bt1; Vec3fa bn0,bdn0,bn1,bdn1;
- gather_hermite(bp0,bt0,bn0,bdn0,bp1,bt1,bn1,bdn1,i,itime+1);
- p0 = madd(Vec3ff(f0),ap0,f1*bp0);
- t0 = madd(Vec3ff(f0),at0,f1*bt0);
- n0 = madd(Vec3ff(f0),an0,f1*bn0);
- dn0= madd(Vec3ff(f0),adn0,f1*bdn0);
- p1 = madd(Vec3ff(f0),ap1,f1*bp1);
- t1 = madd(Vec3ff(f0),at1,f1*bt1);
- n1 = madd(Vec3ff(f0),an1,f1*bn1);
- dn1= madd(Vec3ff(f0),adn1,f1*bdn1);
- }
-
- template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
- __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const size_t itime) const
- {
- Vec3ff v0,t0,v1,t1; Vec3fa n0,dn0,n1,dn1;
- unsigned int vertexID = curve(primID);
- gather_hermite(v0,t0,n0,dn0,v1,t1,n1,dn1,vertexID,itime);
-
- SourceCurve3ff ccurve(v0,t0,v1,t1);
- SourceCurve3fa ncurve(n0,dn0,n1,dn1);
- ccurve = enlargeRadiusToMinWidth(context,this,ray_org,ccurve);
- return TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
- }
-
- template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
- __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const
- {
- float ftime;
- const size_t itime = timeSegment(time, ftime);
- const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+0);
- const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+1);
- return clerp(curve0,curve1,ftime);
- }
-
- private:
- void resizeBuffers(unsigned int numSteps);
-
- public:
- BufferView<unsigned int> curves; //!< array of curve indices
- BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer
- BufferView<Vec3fa> normals0; //!< fast access to first normal buffer
- BufferView<Vec3ff> tangents0; //!< fast access to first tangent buffer
- BufferView<Vec3fa> dnormals0; //!< fast access to first normal derivative buffer
- vector<BufferView<Vec3ff>> vertices; //!< vertex array for each timestep
- vector<BufferView<Vec3fa>> normals; //!< normal array for each timestep
- vector<BufferView<Vec3ff>> tangents; //!< tangent array for each timestep
- vector<BufferView<Vec3fa>> dnormals; //!< normal derivative array for each timestep
- BufferView<char> flags; //!< start, end flag per segment
- vector<BufferView<char>> vertexAttribs; //!< user buffers
- int tessellationRate; //!< tessellation rate for flat curve
- float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii
- };
-
- DECLARE_ISA_FUNCTION(CurveGeometry*, createCurves, Device* COMMA Geometry::GType);
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_grid_mesh.h b/thirdparty/embree-aarch64/kernels/common/scene_grid_mesh.h
deleted file mode 100644
index c08658466a..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_grid_mesh.h
+++ /dev/null
@@ -1,215 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "geometry.h"
-#include "buffer.h"
-
-namespace embree
-{
- /*! Grid Mesh */
- struct GridMesh : public Geometry
- {
- /*! type of this geometry */
- static const Geometry::GTypeMask geom_type = Geometry::MTY_GRID_MESH;
-
- /*! grid */
- struct Grid
- {
- unsigned int startVtxID;
- unsigned int lineVtxOffset;
- unsigned short resX,resY;
-
- /* border flags due to 3x3 vertex pattern */
- __forceinline unsigned int get3x3FlagsX(const unsigned int x) const
- {
- return (x + 2 >= (unsigned int)resX) ? (1<<15) : 0;
- }
-
- /* border flags due to 3x3 vertex pattern */
- __forceinline unsigned int get3x3FlagsY(const unsigned int y) const
- {
- return (y + 2 >= (unsigned int)resY) ? (1<<15) : 0;
- }
-
- /*! outputs grid structure */
- __forceinline friend embree_ostream operator<<(embree_ostream cout, const Grid& t) {
- return cout << "Grid { startVtxID " << t.startVtxID << ", lineVtxOffset " << t.lineVtxOffset << ", resX " << t.resX << ", resY " << t.resY << " }";
- }
- };
-
- public:
-
- /*! grid mesh construction */
- GridMesh (Device* device);
-
- /* geometry interface */
- public:
- void setMask(unsigned mask);
- void setNumTimeSteps (unsigned int numTimeSteps);
- void setVertexAttributeCount (unsigned int N);
- void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
- void* getBuffer(RTCBufferType type, unsigned int slot);
- void updateBuffer(RTCBufferType type, unsigned int slot);
- void commit();
- bool verify();
- void interpolate(const RTCInterpolateArguments* const args);
- void addElementsToCount (GeometryCounts & counts) const;
-
- __forceinline unsigned int getNumSubGrids(const size_t gridID)
- {
- const Grid &g = grid(gridID);
- return max((unsigned int)1,((unsigned int)g.resX >> 1) * ((unsigned int)g.resY >> 1));
- }
-
- /*! get fast access to first vertex buffer */
- __forceinline float * getCompactVertexArray () const {
- return (float*) vertices0.getPtr();
- }
-
- public:
-
- /*! returns number of vertices */
- __forceinline size_t numVertices() const {
- return vertices[0].size();
- }
-
- /*! returns i'th grid*/
- __forceinline const Grid& grid(size_t i) const {
- return grids[i];
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline const Vec3fa vertex(size_t i) const { // FIXME: check if this does a unaligned load
- return vertices0[i];
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline const char* vertexPtr(size_t i) const {
- return vertices0.getPtr(i);
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const Vec3fa vertex(size_t i, size_t itime) const {
- return vertices[itime][i];
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const char* vertexPtr(size_t i, size_t itime) const {
- return vertices[itime].getPtr(i);
- }
-
- /*! returns i'th vertex of the first timestep */
- __forceinline size_t grid_vertex_index(const Grid& g, size_t x, size_t y) const {
- assert(x < (size_t)g.resX);
- assert(y < (size_t)g.resY);
- return g.startVtxID + x + y * g.lineVtxOffset;
- }
-
- /*! returns i'th vertex of the first timestep */
- __forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y) const {
- const size_t index = grid_vertex_index(g,x,y);
- return vertex(index);
- }
-
- /*! returns i'th vertex of the itime'th timestep */
- __forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y, size_t itime) const {
- const size_t index = grid_vertex_index(g,x,y);
- return vertex(index,itime);
- }
-
- /*! calculates the build bounds of the i'th primitive, if it's valid */
- __forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, BBox3fa& bbox) const
- {
- BBox3fa b(empty);
- for (size_t t=0; t<numTimeSteps; t++)
- {
- for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++)
- for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++)
- {
- const Vec3fa v = grid_vertex(g,x,y,t);
- if (unlikely(!isvalid(v))) return false;
- b.extend(v);
- }
- }
-
- bbox = b;
- return true;
- }
-
- /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
- __forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, size_t itime, BBox3fa& bbox) const
- {
- assert(itime < numTimeSteps);
- BBox3fa b0(empty);
- for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++)
- for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++)
- {
- const Vec3fa v = grid_vertex(g,x,y,itime);
- if (unlikely(!isvalid(v))) return false;
- b0.extend(v);
- }
-
- /* use bounds of first time step in builder */
- bbox = b0;
- return true;
- }
-
- __forceinline bool valid(size_t gridID, size_t itime=0) const {
- return valid(gridID, make_range(itime, itime));
- }
-
- /*! check if the i'th primitive is valid between the specified time range */
- __forceinline bool valid(size_t gridID, const range<size_t>& itime_range) const
- {
- if (unlikely(gridID >= grids.size())) return false;
- const Grid &g = grid(gridID);
- if (unlikely(g.startVtxID + 0 >= vertices0.size())) return false;
- if (unlikely(g.startVtxID + (g.resY-1)*g.lineVtxOffset + g.resX-1 >= vertices0.size())) return false;
-
- for (size_t y=0;y<g.resY;y++)
- for (size_t x=0;x<g.resX;x++)
- for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
- if (!isvalid(grid_vertex(g,x,y,itime))) return false;
- return true;
- }
-
-
- __forceinline BBox3fa bounds(const Grid& g, size_t sx, size_t sy, size_t itime) const
- {
- BBox3fa box(empty);
- buildBounds(g,sx,sy,itime,box);
- return box;
- }
-
- __forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, size_t itime) const {
- BBox3fa bounds0, bounds1;
- buildBounds(g,sx,sy,itime+0,bounds0);
- buildBounds(g,sx,sy,itime+1,bounds1);
- return LBBox3fa(bounds0,bounds1);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, const BBox1f& dt) const {
- return LBBox3fa([&] (size_t itime) { return bounds(g,sx,sy,itime); }, dt, time_range, fnumTimeSegments);
- }
-
- public:
- BufferView<Grid> grids; //!< array of triangles
- BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer
- vector<BufferView<Vec3fa>> vertices; //!< vertex array for each timestep
- vector<RawBufferView> vertexAttribs; //!< vertex attributes
- };
-
- namespace isa
- {
- struct GridMeshISA : public GridMesh
- {
- GridMeshISA (Device* device)
- : GridMesh(device) {}
- };
- }
-
- DECLARE_ISA_FUNCTION(GridMesh*, createGridMesh, Device*);
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_instance.h b/thirdparty/embree-aarch64/kernels/common/scene_instance.h
deleted file mode 100644
index 7ff82a4fb8..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_instance.h
+++ /dev/null
@@ -1,272 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "geometry.h"
-#include "accel.h"
-
-namespace embree
-{
- struct MotionDerivativeCoefficients;
-
- /*! Instanced acceleration structure */
- struct Instance : public Geometry
- {
- ALIGNED_STRUCT_(16);
- static const Geometry::GTypeMask geom_type = Geometry::MTY_INSTANCE;
-
- public:
- Instance (Device* device, Accel* object = nullptr, unsigned int numTimeSteps = 1);
- ~Instance();
-
- private:
- Instance (const Instance& other) DELETED; // do not implement
- Instance& operator= (const Instance& other) DELETED; // do not implement
-
- private:
- LBBox3fa nonlinearBounds(const BBox1f& time_range_in,
- const BBox1f& geom_time_range,
- float geom_time_segments) const;
-
- BBox3fa boundSegment(size_t itime,
- BBox3fa const& obbox0, BBox3fa const& obbox1,
- BBox3fa const& bbox0, BBox3fa const& bbox1,
- float t_min, float t_max) const;
-
- /* calculates the (correct) interpolated bounds */
- __forceinline BBox3fa bounds(size_t itime0, size_t itime1, float f) const
- {
- if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
- return xfmBounds(slerp(local2world[itime0], local2world[itime1], f),
- lerp(getObjectBounds(itime0), getObjectBounds(itime1), f));
- return xfmBounds(lerp(local2world[itime0], local2world[itime1], f),
- lerp(getObjectBounds(itime0), getObjectBounds(itime1), f));
- }
-
- public:
- virtual void setNumTimeSteps (unsigned int numTimeSteps) override;
- virtual void setInstancedScene(const Ref<Scene>& scene) override;
- virtual void setTransform(const AffineSpace3fa& local2world, unsigned int timeStep) override;
- virtual void setQuaternionDecomposition(const AffineSpace3ff& qd, unsigned int timeStep) override;
- virtual AffineSpace3fa getTransform(float time) override;
- virtual void setMask (unsigned mask) override;
- virtual void build() {}
- virtual void addElementsToCount (GeometryCounts & counts) const override;
- virtual void commit() override;
-
- public:
-
- /*! calculates the bounds of instance */
- __forceinline BBox3fa bounds(size_t i) const {
- assert(i == 0);
- if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
- return xfmBounds(quaternionDecompositionToAffineSpace(local2world[0]),object->bounds.bounds());
- return xfmBounds(local2world[0],object->bounds.bounds());
- }
-
- /*! gets the bounds of the instanced scene */
- __forceinline BBox3fa getObjectBounds(size_t itime) const {
- return object->getBounds(timeStep(itime));
- }
-
- /*! calculates the bounds of instance */
- __forceinline BBox3fa bounds(size_t i, size_t itime) const {
- assert(i == 0);
- if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
- return xfmBounds(quaternionDecompositionToAffineSpace(local2world[itime]),getObjectBounds(itime));
- return xfmBounds(local2world[itime],getObjectBounds(itime));
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(size_t i, const BBox1f& dt) const {
- assert(i == 0);
- LBBox3fa lbbox = nonlinearBounds(dt, time_range, fnumTimeSegments);
- return lbbox;
- }
-
- /*! calculates the build bounds of the i'th item, if it's valid */
- __forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
- {
- assert(i==0);
- const BBox3fa b = bounds(i);
- if (bbox) *bbox = b;
- return isvalid(b);
- }
-
- /*! calculates the build bounds of the i'th item at the itime'th time segment, if it's valid */
- __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
- {
- assert(i==0);
- const LBBox3fa bounds = linearBounds(i,itime);
- bbox = bounds.bounds ();
- return isvalid(bounds);
- }
-
- /* gets version info of topology */
- unsigned int getTopologyVersion() const {
- return numPrimitives;
- }
-
- /* returns true if topology changed */
- bool topologyChanged(unsigned int otherVersion) const {
- return numPrimitives != otherVersion;
- }
-
- /*! check if the i'th primitive is valid between the specified time range */
- __forceinline bool valid(size_t i, const range<size_t>& itime_range) const
- {
- assert(i == 0);
- for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
- if (!isvalid(bounds(i,itime))) return false;
-
- return true;
- }
-
- __forceinline AffineSpace3fa getLocal2World() const
- {
- if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
- return quaternionDecompositionToAffineSpace(local2world[0]);
- return local2world[0];
- }
-
- __forceinline AffineSpace3fa getLocal2World(float t) const
- {
- float ftime; const unsigned int itime = timeSegment(t, ftime);
- if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
- return slerp(local2world[itime+0],local2world[itime+1],ftime);
- return lerp(local2world[itime+0],local2world[itime+1],ftime);
- }
-
- __forceinline AffineSpace3fa getWorld2Local() const {
- return world2local0;
- }
-
- __forceinline AffineSpace3fa getWorld2Local(float t) const {
- return rcp(getLocal2World(t));
- }
-
- template<int K>
- __forceinline AffineSpace3vf<K> getWorld2Local(const vbool<K>& valid, const vfloat<K>& t) const
- {
- if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
- return getWorld2LocalSlerp(valid, t);
- return getWorld2LocalLerp(valid, t);
- }
-
- private:
-
- template<int K>
- __forceinline AffineSpace3vf<K> getWorld2LocalSlerp(const vbool<K>& valid, const vfloat<K>& t) const
- {
- vfloat<K> ftime;
- const vint<K> itime_k = timeSegment(t, ftime);
- assert(any(valid));
- const size_t index = bsf(movemask(valid));
- const int itime = itime_k[index];
- if (likely(all(valid, itime_k == vint<K>(itime)))) {
- return rcp(slerp(AffineSpace3vff<K>(local2world[itime+0]),
- AffineSpace3vff<K>(local2world[itime+1]),
- ftime));
- }
- else {
- AffineSpace3vff<K> space0,space1;
- vbool<K> valid1 = valid;
- while (any(valid1)) {
- vbool<K> valid2;
- const int itime = next_unique(valid1, itime_k, valid2);
- space0 = select(valid2, AffineSpace3vff<K>(local2world[itime+0]), space0);
- space1 = select(valid2, AffineSpace3vff<K>(local2world[itime+1]), space1);
- }
- return rcp(slerp(space0, space1, ftime));
- }
- }
-
- template<int K>
- __forceinline AffineSpace3vf<K> getWorld2LocalLerp(const vbool<K>& valid, const vfloat<K>& t) const
- {
- vfloat<K> ftime;
- const vint<K> itime_k = timeSegment(t, ftime);
- assert(any(valid));
- const size_t index = bsf(movemask(valid));
- const int itime = itime_k[index];
- if (likely(all(valid, itime_k == vint<K>(itime)))) {
- return rcp(lerp(AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+0]),
- AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+1]),
- ftime));
- } else {
- AffineSpace3vf<K> space0,space1;
- vbool<K> valid1 = valid;
- while (any(valid1)) {
- vbool<K> valid2;
- const int itime = next_unique(valid1, itime_k, valid2);
- space0 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+0]), space0);
- space1 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+1]), space1);
- }
- return rcp(lerp(space0, space1, ftime));
- }
- }
-
- public:
- Accel* object; //!< pointer to instanced acceleration structure
- AffineSpace3ff* local2world; //!< transformation from local space to world space for each timestep (either normal matrix or quaternion decomposition)
- AffineSpace3fa world2local0; //!< transformation from world space to local space for timestep 0
- };
-
- namespace isa
- {
- struct InstanceISA : public Instance
- {
- InstanceISA (Device* device)
- : Instance(device) {}
-
- PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- assert(r.begin() == 0);
- assert(r.end() == 1);
-
- PrimInfo pinfo(empty);
- BBox3fa b = empty;
- if (!buildBounds(0,&b)) return pinfo;
- // const BBox3fa b = bounds(0);
- // if (!isvalid(b)) return pinfo;
-
- const PrimRef prim(b,geomID,unsigned(0));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- return pinfo;
- }
-
- PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- assert(r.begin() == 0);
- assert(r.end() == 1);
-
- PrimInfo pinfo(empty);
- BBox3fa b = empty;
- if (!buildBounds(0,&b)) return pinfo;
- // if (!valid(0,range<size_t>(itime))) return pinfo;
- // const PrimRef prim(linearBounds(0,itime).bounds(),geomID,unsigned(0));
- const PrimRef prim(b,geomID,unsigned(0));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- return pinfo;
- }
-
- PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- assert(r.begin() == 0);
- assert(r.end() == 1);
-
- PrimInfoMB pinfo(empty);
- if (!valid(0, timeSegmentRange(t0t1))) return pinfo;
- const PrimRefMB prim(linearBounds(0,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(0));
- pinfo.add_primref(prim);
- prims[k++] = prim;
- return pinfo;
- }
- };
- }
-
- DECLARE_ISA_FUNCTION(Instance*, createInstance, Device*);
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_line_segments.h b/thirdparty/embree-aarch64/kernels/common/scene_line_segments.h
deleted file mode 100644
index c0f9ee8f77..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_line_segments.h
+++ /dev/null
@@ -1,307 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "geometry.h"
-#include "buffer.h"
-
-namespace embree
-{
- /*! represents an array of line segments */
- struct LineSegments : public Geometry
- {
- /*! type of this geometry */
- static const Geometry::GTypeMask geom_type = Geometry::MTY_CURVE2;
-
- public:
-
- /*! line segments construction */
- LineSegments (Device* device, Geometry::GType gtype);
-
- public:
- void setMask (unsigned mask);
- void setNumTimeSteps (unsigned int numTimeSteps);
- void setVertexAttributeCount (unsigned int N);
- void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
- void* getBuffer(RTCBufferType type, unsigned int slot);
- void updateBuffer(RTCBufferType type, unsigned int slot);
- void commit();
- bool verify ();
- void interpolate(const RTCInterpolateArguments* const args);
- void setTessellationRate(float N);
- void setMaxRadiusScale(float s);
- void addElementsToCount (GeometryCounts & counts) const;
-
- public:
-
- /*! returns the number of vertices */
- __forceinline size_t numVertices() const {
- return vertices[0].size();
- }
-
- /*! returns the i'th segment */
- __forceinline const unsigned int& segment(size_t i) const {
- return segments[i];
- }
-
- /*! returns the segment to the left of the i'th segment */
- __forceinline bool segmentLeftExists(size_t i) const {
- assert (flags);
- return (flags[i] & RTC_CURVE_FLAG_NEIGHBOR_LEFT) != 0;
- }
-
- /*! returns the segment to the right of the i'th segment */
- __forceinline bool segmentRightExists(size_t i) const {
- assert (flags);
- return (flags[i] & RTC_CURVE_FLAG_NEIGHBOR_RIGHT) != 0;
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline Vec3ff vertex(size_t i) const {
- return vertices0[i];
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline const char* vertexPtr(size_t i) const {
- return vertices0.getPtr(i);
- }
-
- /*! returns i'th normal of the first time step */
- __forceinline Vec3fa normal(size_t i) const {
- return normals0[i];
- }
-
- /*! returns i'th radius of the first time step */
- __forceinline float radius(size_t i) const {
- return vertices0[i].w;
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline Vec3ff vertex(size_t i, size_t itime) const {
- return vertices[itime][i];
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const char* vertexPtr(size_t i, size_t itime) const {
- return vertices[itime].getPtr(i);
- }
-
- /*! returns i'th normal of itime'th timestep */
- __forceinline Vec3fa normal(size_t i, size_t itime) const {
- return normals[itime][i];
- }
-
- /*! returns i'th radius of itime'th timestep */
- __forceinline float radius(size_t i, size_t itime) const {
- return vertices[itime][i].w;
- }
-
- /*! calculates bounding box of i'th line segment */
- __forceinline BBox3fa bounds(const Vec3ff& v0, const Vec3ff& v1) const
- {
- const BBox3ff b = merge(BBox3ff(v0),BBox3ff(v1));
- return enlarge((BBox3fa)b,maxRadiusScale*Vec3fa(max(v0.w,v1.w)));
- }
-
- /*! calculates bounding box of i'th line segment */
- __forceinline BBox3fa bounds(size_t i) const
- {
- const unsigned int index = segment(i);
- const Vec3ff v0 = vertex(index+0);
- const Vec3ff v1 = vertex(index+1);
- return bounds(v0,v1);
- }
-
- /*! calculates bounding box of i'th line segment for the itime'th time step */
- __forceinline BBox3fa bounds(size_t i, size_t itime) const
- {
- const unsigned int index = segment(i);
- const Vec3ff v0 = vertex(index+0,itime);
- const Vec3ff v1 = vertex(index+1,itime);
- return bounds(v0,v1);
- }
-
- /*! calculates bounding box of i'th line segment */
- __forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i) const
- {
- const unsigned int index = segment(i);
- const Vec3ff v0 = vertex(index+0);
- const Vec3ff v1 = vertex(index+1);
- const Vec3ff w0(xfmVector(space,(Vec3fa)v0),v0.w);
- const Vec3ff w1(xfmVector(space,(Vec3fa)v1),v1.w);
- return bounds(w0,w1);
- }
-
- /*! calculates bounding box of i'th line segment for the itime'th time step */
- __forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i, size_t itime) const
- {
- const unsigned int index = segment(i);
- const Vec3ff v0 = vertex(index+0,itime);
- const Vec3ff v1 = vertex(index+1,itime);
- const Vec3ff w0(xfmVector(space,(Vec3fa)v0),v0.w);
- const Vec3ff w1(xfmVector(space,(Vec3fa)v1),v1.w);
- return bounds(w0,w1);
- }
-
- /*! check if the i'th primitive is valid at the itime'th timestep */
- __forceinline bool valid(size_t i, size_t itime) const {
- return valid(i, make_range(itime, itime));
- }
-
- /*! check if the i'th primitive is valid between the specified time range */
- __forceinline bool valid(size_t i, const range<size_t>& itime_range) const
- {
- const unsigned int index = segment(i);
- if (index+1 >= numVertices()) return false;
-
- for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
- {
- const Vec3ff v0 = vertex(index+0,itime); if (unlikely(!isvalid4(v0))) return false;
- const Vec3ff v1 = vertex(index+1,itime); if (unlikely(!isvalid4(v1))) return false;
- if (min(v0.w,v1.w) < 0.0f) return false;
- }
- return true;
- }
-
- /*! calculates the linear bounds of the i'th primitive at the itimeGlobal'th time segment */
- __forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
- return LBBox3fa(bounds(i,itime+0),bounds(i,itime+1));
- }
-
- /*! calculates the build bounds of the i'th primitive, if it's valid */
- __forceinline bool buildBounds(size_t i, BBox3fa* bbox) const
- {
- if (!valid(i,0)) return false;
- *bbox = bounds(i);
- return true;
- }
-
- /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
- __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
- {
- if (!valid(i,itime+0) || !valid(i,itime+1)) return false;
- bbox = bounds(i,itime); // use bounds of first time step in builder
- return true;
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
- return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& dt) const {
- return LBBox3fa([&] (size_t itime) { return bounds(space, primID, itime); }, dt, time_range, fnumTimeSegments);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const
- {
- if (!valid(i, timeSegmentRange(time_range))) return false;
- bbox = linearBounds(i, time_range);
- return true;
- }
-
- /*! get fast access to first vertex buffer */
- __forceinline float * getCompactVertexArray () const {
- return (float*) vertices0.getPtr();
- }
-
- public:
- BufferView<unsigned int> segments; //!< array of line segment indices
- BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer
- BufferView<Vec3fa> normals0; //!< fast access to first normal buffer
- BufferView<char> flags; //!< start, end flag per segment
- vector<BufferView<Vec3ff>> vertices; //!< vertex array for each timestep
- vector<BufferView<Vec3fa>> normals; //!< normal array for each timestep
- vector<BufferView<char>> vertexAttribs; //!< user buffers
- int tessellationRate; //!< tessellation rate for bezier curve
- float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii
- };
-
- namespace isa
- {
- struct LineSegmentsISA : public LineSegments
- {
- LineSegmentsISA (Device* device, Geometry::GType gtype)
- : LineSegments(device,gtype) {}
-
- Vec3fa computeDirection(unsigned int primID) const
- {
- const unsigned vtxID = segment(primID);
- const Vec3fa v0 = vertex(vtxID+0);
- const Vec3fa v1 = vertex(vtxID+1);
- return v1-v0;
- }
-
- Vec3fa computeDirection(unsigned int primID, size_t time) const
- {
- const unsigned vtxID = segment(primID);
- const Vec3fa v0 = vertex(vtxID+0,time);
- const Vec3fa v1 = vertex(vtxID+1,time);
- return v1-v0;
- }
-
- PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!buildBounds(j,&bounds)) continue;
- const PrimRef prim(bounds,geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!buildBounds(j,itime,bounds)) continue;
- const PrimRef prim(bounds,geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfoMB pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!valid(j, timeSegmentRange(t0t1))) continue;
- const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j));
- pinfo.add_primref(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- BBox3fa vbounds(size_t i) const {
- return bounds(i);
- }
-
- BBox3fa vbounds(const LinearSpace3fa& space, size_t i) const {
- return bounds(space,i);
- }
-
- LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
- return linearBounds(primID,time_range);
- }
-
- LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
- return linearBounds(space,primID,time_range);
- }
- };
- }
-
- DECLARE_ISA_FUNCTION(LineSegments*, createLineSegments, Device* COMMA Geometry::GType);
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_points.h b/thirdparty/embree-aarch64/kernels/common/scene_points.h
deleted file mode 100644
index 1d39ed07ba..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_points.h
+++ /dev/null
@@ -1,282 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "buffer.h"
-#include "default.h"
-#include "geometry.h"
-
-namespace embree
-{
- /*! represents an array of points */
- struct Points : public Geometry
- {
- /*! type of this geometry */
- static const Geometry::GTypeMask geom_type = Geometry::MTY_POINTS;
-
- public:
- /*! line segments construction */
- Points(Device* device, Geometry::GType gtype);
-
- public:
- void setMask(unsigned mask);
- void setNumTimeSteps(unsigned int numTimeSteps);
- void setVertexAttributeCount(unsigned int N);
- void setBuffer(RTCBufferType type,
- unsigned int slot,
- RTCFormat format,
- const Ref<Buffer>& buffer,
- size_t offset,
- size_t stride,
- unsigned int num);
- void* getBuffer(RTCBufferType type, unsigned int slot);
- void updateBuffer(RTCBufferType type, unsigned int slot);
- void commit();
- bool verify();
- void setMaxRadiusScale(float s);
- void addElementsToCount (GeometryCounts & counts) const;
-
- public:
- /*! returns the number of vertices */
- __forceinline size_t numVertices() const {
- return vertices[0].size();
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline Vec3ff vertex(size_t i) const {
- return vertices0[i];
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline const char* vertexPtr(size_t i) const {
- return vertices0.getPtr(i);
- }
-
- /*! returns i'th normal of the first time step */
- __forceinline Vec3fa normal(size_t i) const {
- return normals0[i];
- }
-
- /*! returns i'th radius of the first time step */
- __forceinline float radius(size_t i) const {
- return vertices0[i].w;
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline Vec3ff vertex(size_t i, size_t itime) const {
- return vertices[itime][i];
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const char* vertexPtr(size_t i, size_t itime) const {
- return vertices[itime].getPtr(i);
- }
-
- /*! returns i'th normal of itime'th timestep */
- __forceinline Vec3fa normal(size_t i, size_t itime) const {
- return normals[itime][i];
- }
-
- /*! returns i'th radius of itime'th timestep */
- __forceinline float radius(size_t i, size_t itime) const {
- return vertices[itime][i].w;
- }
-
- /*! calculates bounding box of i'th line segment */
- __forceinline BBox3fa bounds(const Vec3ff& v0) const {
- return enlarge(BBox3fa(v0), maxRadiusScale*Vec3fa(v0.w));
- }
-
- /*! calculates bounding box of i'th line segment */
- __forceinline BBox3fa bounds(size_t i) const
- {
- const Vec3ff v0 = vertex(i);
- return bounds(v0);
- }
-
- /*! calculates bounding box of i'th line segment for the itime'th time step */
- __forceinline BBox3fa bounds(size_t i, size_t itime) const
- {
- const Vec3ff v0 = vertex(i, itime);
- return bounds(v0);
- }
-
- /*! calculates bounding box of i'th line segment */
- __forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i) const
- {
- const Vec3ff v0 = vertex(i);
- const Vec3ff w0(xfmVector(space, (Vec3fa)v0), v0.w);
- return bounds(w0);
- }
-
- /*! calculates bounding box of i'th line segment for the itime'th time step */
- __forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i, size_t itime) const
- {
- const Vec3ff v0 = vertex(i, itime);
- const Vec3ff w0(xfmVector(space, (Vec3fa)v0), v0.w);
- return bounds(w0);
- }
-
- /*! check if the i'th primitive is valid at the itime'th timestep */
- __forceinline bool valid(size_t i, size_t itime) const {
- return valid(i, make_range(itime, itime));
- }
-
- /*! check if the i'th primitive is valid between the specified time range */
- __forceinline bool valid(size_t i, const range<size_t>& itime_range) const
- {
- const unsigned int index = (unsigned int)i;
- if (index >= numVertices())
- return false;
-
- for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++) {
- const Vec3ff v0 = vertex(index + 0, itime);
- if (unlikely(!isvalid4(v0)))
- return false;
- if (v0.w < 0.0f)
- return false;
- }
- return true;
- }
-
- /*! calculates the linear bounds of the i'th primitive at the itimeGlobal'th time segment */
- __forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
- return LBBox3fa(bounds(i, itime + 0), bounds(i, itime + 1));
- }
-
- /*! calculates the build bounds of the i'th primitive, if it's valid */
- __forceinline bool buildBounds(size_t i, BBox3fa* bbox) const
- {
- if (!valid(i, 0))
- return false;
- *bbox = bounds(i);
- return true;
- }
-
- /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
- __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
- {
- if (!valid(i, itime + 0) || !valid(i, itime + 1))
- return false;
- bbox = bounds(i, itime); // use bounds of first time step in builder
- return true;
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
- return LBBox3fa([&](size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& dt) const {
- return LBBox3fa([&](size_t itime) { return bounds(space, primID, itime); }, dt, time_range, fnumTimeSegments);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const
- {
- if (!valid(i, timeSegmentRange(time_range))) return false;
- bbox = linearBounds(i, time_range);
- return true;
- }
-
- /*! get fast access to first vertex buffer */
- __forceinline float * getCompactVertexArray () const {
- return (float*) vertices0.getPtr();
- }
-
- public:
- BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer
- BufferView<Vec3fa> normals0; //!< fast access to first normal buffer
- vector<BufferView<Vec3ff>> vertices; //!< vertex array for each timestep
- vector<BufferView<Vec3fa>> normals; //!< normal array for each timestep
- vector<BufferView<char>> vertexAttribs; //!< user buffers
- float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii
- };
-
- namespace isa
- {
- struct PointsISA : public Points
- {
- PointsISA(Device* device, Geometry::GType gtype) : Points(device, gtype) {}
-
- Vec3fa computeDirection(unsigned int primID) const
- {
- return Vec3fa(1, 0, 0);
- }
-
- Vec3fa computeDirection(unsigned int primID, size_t time) const
- {
- return Vec3fa(1, 0, 0);
- }
-
- PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j = r.begin(); j < r.end(); j++) {
- BBox3fa bounds = empty;
- if (!buildBounds(j, &bounds))
- continue;
- const PrimRef prim(bounds, geomID, unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j = r.begin(); j < r.end(); j++) {
- BBox3fa bounds = empty;
- if (!buildBounds(j, itime, bounds))
- continue;
- const PrimRef prim(bounds, geomID, unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims,
- const BBox1f& t0t1,
- const range<size_t>& r,
- size_t k,
- unsigned int geomID) const
- {
- PrimInfoMB pinfo(empty);
- for (size_t j = r.begin(); j < r.end(); j++) {
- if (!valid(j, timeSegmentRange(t0t1)))
- continue;
- const PrimRefMB prim(linearBounds(j, t0t1), this->numTimeSegments(), this->time_range, this->numTimeSegments(), geomID, unsigned(j));
- pinfo.add_primref(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- BBox3fa vbounds(size_t i) const
- {
- return bounds(i);
- }
-
- BBox3fa vbounds(const LinearSpace3fa& space, size_t i) const
- {
- return bounds(space, i);
- }
-
- LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const
- {
- return linearBounds(primID, time_range);
- }
-
- LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const
- {
- return linearBounds(space, primID, time_range);
- }
- };
- } // namespace isa
-
- DECLARE_ISA_FUNCTION(Points*, createPoints, Device* COMMA Geometry::GType);
-} // namespace embree
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_quad_mesh.h b/thirdparty/embree-aarch64/kernels/common/scene_quad_mesh.h
deleted file mode 100644
index d5bb054b14..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_quad_mesh.h
+++ /dev/null
@@ -1,277 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "geometry.h"
-#include "buffer.h"
-
-namespace embree
-{
- /*! Quad Mesh */
- struct QuadMesh : public Geometry
- {
- /*! type of this geometry */
- static const Geometry::GTypeMask geom_type = Geometry::MTY_QUAD_MESH;
-
- /*! triangle indices */
- struct Quad
- {
- uint32_t v[4];
-
- /*! outputs triangle indices */
- __forceinline friend embree_ostream operator<<(embree_ostream cout, const Quad& q) {
- return cout << "Quad {" << q.v[0] << ", " << q.v[1] << ", " << q.v[2] << ", " << q.v[3] << " }";
- }
- };
-
- public:
-
- /*! quad mesh construction */
- QuadMesh (Device* device);
-
- /* geometry interface */
- public:
- void setMask(unsigned mask);
- void setNumTimeSteps (unsigned int numTimeSteps);
- void setVertexAttributeCount (unsigned int N);
- void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
- void* getBuffer(RTCBufferType type, unsigned int slot);
- void updateBuffer(RTCBufferType type, unsigned int slot);
- void commit();
- bool verify();
- void interpolate(const RTCInterpolateArguments* const args);
- void addElementsToCount (GeometryCounts & counts) const;
-
- public:
-
- /*! returns number of vertices */
- __forceinline size_t numVertices() const {
- return vertices[0].size();
- }
-
- /*! returns i'th quad */
- __forceinline const Quad& quad(size_t i) const {
- return quads[i];
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const Vec3fa vertex(size_t i) const {
- return vertices0[i];
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const char* vertexPtr(size_t i) const {
- return vertices0.getPtr(i);
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const Vec3fa vertex(size_t i, size_t itime) const {
- return vertices[itime][i];
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const char* vertexPtr(size_t i, size_t itime) const {
- return vertices[itime].getPtr(i);
- }
-
- /*! calculates the bounds of the i'th quad */
- __forceinline BBox3fa bounds(size_t i) const
- {
- const Quad& q = quad(i);
- const Vec3fa v0 = vertex(q.v[0]);
- const Vec3fa v1 = vertex(q.v[1]);
- const Vec3fa v2 = vertex(q.v[2]);
- const Vec3fa v3 = vertex(q.v[3]);
- return BBox3fa(min(v0,v1,v2,v3),max(v0,v1,v2,v3));
- }
-
- /*! calculates the bounds of the i'th quad at the itime'th timestep */
- __forceinline BBox3fa bounds(size_t i, size_t itime) const
- {
- const Quad& q = quad(i);
- const Vec3fa v0 = vertex(q.v[0],itime);
- const Vec3fa v1 = vertex(q.v[1],itime);
- const Vec3fa v2 = vertex(q.v[2],itime);
- const Vec3fa v3 = vertex(q.v[3],itime);
- return BBox3fa(min(v0,v1,v2,v3),max(v0,v1,v2,v3));
- }
-
- /*! check if the i'th primitive is valid at the itime'th timestep */
- __forceinline bool valid(size_t i, size_t itime) const {
- return valid(i, make_range(itime, itime));
- }
-
- /*! check if the i'th primitive is valid between the specified time range */
- __forceinline bool valid(size_t i, const range<size_t>& itime_range) const
- {
- const Quad& q = quad(i);
- if (unlikely(q.v[0] >= numVertices())) return false;
- if (unlikely(q.v[1] >= numVertices())) return false;
- if (unlikely(q.v[2] >= numVertices())) return false;
- if (unlikely(q.v[3] >= numVertices())) return false;
-
- for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
- {
- if (!isvalid(vertex(q.v[0],itime))) return false;
- if (!isvalid(vertex(q.v[1],itime))) return false;
- if (!isvalid(vertex(q.v[2],itime))) return false;
- if (!isvalid(vertex(q.v[3],itime))) return false;
- }
-
- return true;
- }
-
- /*! calculates the linear bounds of the i'th quad at the itimeGlobal'th time segment */
- __forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
- return LBBox3fa(bounds(i,itime+0),bounds(i,itime+1));
- }
-
- /*! calculates the build bounds of the i'th primitive, if it's valid */
- __forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
- {
- const Quad& q = quad(i);
- if (q.v[0] >= numVertices()) return false;
- if (q.v[1] >= numVertices()) return false;
- if (q.v[2] >= numVertices()) return false;
- if (q.v[3] >= numVertices()) return false;
-
- for (unsigned int t=0; t<numTimeSteps; t++)
- {
- const Vec3fa v0 = vertex(q.v[0],t);
- const Vec3fa v1 = vertex(q.v[1],t);
- const Vec3fa v2 = vertex(q.v[2],t);
- const Vec3fa v3 = vertex(q.v[3],t);
-
- if (unlikely(!isvalid(v0) || !isvalid(v1) || !isvalid(v2) || !isvalid(v3)))
- return false;
- }
-
- if (bbox)
- *bbox = bounds(i);
-
- return true;
- }
-
- /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
- __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
- {
- const Quad& q = quad(i);
- if (unlikely(q.v[0] >= numVertices())) return false;
- if (unlikely(q.v[1] >= numVertices())) return false;
- if (unlikely(q.v[2] >= numVertices())) return false;
- if (unlikely(q.v[3] >= numVertices())) return false;
-
- assert(itime+1 < numTimeSteps);
- const Vec3fa a0 = vertex(q.v[0],itime+0); if (unlikely(!isvalid(a0))) return false;
- const Vec3fa a1 = vertex(q.v[1],itime+0); if (unlikely(!isvalid(a1))) return false;
- const Vec3fa a2 = vertex(q.v[2],itime+0); if (unlikely(!isvalid(a2))) return false;
- const Vec3fa a3 = vertex(q.v[3],itime+0); if (unlikely(!isvalid(a3))) return false;
- const Vec3fa b0 = vertex(q.v[0],itime+1); if (unlikely(!isvalid(b0))) return false;
- const Vec3fa b1 = vertex(q.v[1],itime+1); if (unlikely(!isvalid(b1))) return false;
- const Vec3fa b2 = vertex(q.v[2],itime+1); if (unlikely(!isvalid(b2))) return false;
- const Vec3fa b3 = vertex(q.v[3],itime+1); if (unlikely(!isvalid(b3))) return false;
-
- /* use bounds of first time step in builder */
- bbox = BBox3fa(min(a0,a1,a2,a3),max(a0,a1,a2,a3));
- return true;
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
- return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline bool linearBounds(size_t i, const BBox1f& dt, LBBox3fa& bbox) const
- {
- if (!valid(i, timeSegmentRange(dt))) return false;
- bbox = linearBounds(i, dt);
- return true;
- }
-
- /*! get fast access to first vertex buffer */
- __forceinline float * getCompactVertexArray () const {
- return (float*) vertices0.getPtr();
- }
-
- /* gets version info of topology */
- unsigned int getTopologyVersion() const {
- return quads.modCounter;
- }
-
- /* returns true if topology changed */
- bool topologyChanged(unsigned int otherVersion) const {
- return quads.isModified(otherVersion); // || numPrimitivesChanged;
- }
-
- /* returns the projected area */
- __forceinline float projectedPrimitiveArea(const size_t i) const {
- const Quad& q = quad(i);
- const Vec3fa v0 = vertex(q.v[0]);
- const Vec3fa v1 = vertex(q.v[1]);
- const Vec3fa v2 = vertex(q.v[2]);
- const Vec3fa v3 = vertex(q.v[3]);
- return areaProjectedTriangle(v0,v1,v3) +
- areaProjectedTriangle(v1,v2,v3);
- }
-
- public:
- BufferView<Quad> quads; //!< array of quads
- BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer
- vector<BufferView<Vec3fa>> vertices; //!< vertex array for each timestep
- vector<BufferView<char>> vertexAttribs; //!< vertex attribute buffers
- };
-
- namespace isa
- {
- struct QuadMeshISA : public QuadMesh
- {
- QuadMeshISA (Device* device)
- : QuadMesh(device) {}
-
- PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!buildBounds(j,&bounds)) continue;
- const PrimRef prim(bounds,geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!buildBounds(j,itime,bounds)) continue;
- const PrimRef prim(bounds,geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfoMB pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!valid(j, timeSegmentRange(t0t1))) continue;
- const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j));
- pinfo.add_primref(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
- };
- }
-
- DECLARE_ISA_FUNCTION(QuadMesh*, createQuadMesh, Device*);
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_subdiv_mesh.h b/thirdparty/embree-aarch64/kernels/common/scene_subdiv_mesh.h
deleted file mode 100644
index d0246009db..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_subdiv_mesh.h
+++ /dev/null
@@ -1,326 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "geometry.h"
-#include "buffer.h"
-#include "../subdiv/half_edge.h"
-#include "../subdiv/tessellation_cache.h"
-#include "../subdiv/catmullclark_coefficients.h"
-#include "../subdiv/patch.h"
-#include "../../common/algorithms/parallel_map.h"
-#include "../../common/algorithms/parallel_set.h"
-
-namespace embree
-{
- class SubdivMesh : public Geometry
- {
- ALIGNED_CLASS_(16);
- public:
-
- typedef HalfEdge::Edge Edge;
-
- /*! type of this geometry */
- static const Geometry::GTypeMask geom_type = Geometry::MTY_SUBDIV_MESH;
-
- /*! structure used to sort half edges using radix sort by their key */
- struct KeyHalfEdge
- {
- KeyHalfEdge() {}
-
- KeyHalfEdge (uint64_t key, HalfEdge* edge)
- : key(key), edge(edge) {}
-
- __forceinline operator uint64_t() const {
- return key;
- }
-
- friend __forceinline bool operator<(const KeyHalfEdge& e0, const KeyHalfEdge& e1) {
- return e0.key < e1.key;
- }
-
- public:
- uint64_t key;
- HalfEdge* edge;
- };
-
- public:
-
- /*! subdiv mesh construction */
- SubdivMesh(Device* device);
-
- public:
- void setMask (unsigned mask);
- void setSubdivisionMode (unsigned int topologyID, RTCSubdivisionMode mode);
- void setVertexAttributeTopology(unsigned int vertexAttribID, unsigned int topologyID);
- void setNumTimeSteps (unsigned int numTimeSteps);
- void setVertexAttributeCount (unsigned int N);
- void setTopologyCount (unsigned int N);
- void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
- void* getBuffer(RTCBufferType type, unsigned int slot);
- void updateBuffer(RTCBufferType type, unsigned int slot);
- void setTessellationRate(float N);
- bool verify();
- void commit();
- void addElementsToCount (GeometryCounts & counts) const;
- void setDisplacementFunction (RTCDisplacementFunctionN func);
- unsigned int getFirstHalfEdge(unsigned int faceID);
- unsigned int getFace(unsigned int edgeID);
- unsigned int getNextHalfEdge(unsigned int edgeID);
- unsigned int getPreviousHalfEdge(unsigned int edgeID);
- unsigned int getOppositeHalfEdge(unsigned int topologyID, unsigned int edgeID);
-
- public:
-
- /*! return the number of faces */
- size_t numFaces() const {
- return faceVertices.size();
- }
-
- /*! return the number of edges */
- size_t numEdges() const {
- return topology[0].vertexIndices.size();
- }
-
- /*! return the number of vertices */
- size_t numVertices() const {
- return vertices[0].size();
- }
-
- /*! calculates the bounds of the i'th subdivision patch at the j'th timestep */
- __forceinline BBox3fa bounds(size_t i, size_t j = 0) const {
- return topology[0].getHalfEdge(i)->bounds(vertices[j]);
- }
-
- /*! check if the i'th primitive is valid */
- __forceinline bool valid(size_t i) const {
- return topology[0].valid(i) && !invalidFace(i);
- }
-
- /*! check if the i'th primitive is valid for the j'th time range */
- __forceinline bool valid(size_t i, size_t j) const {
- return topology[0].valid(i) && !invalidFace(i,j);
- }
-
- /*! prints some statistics */
- void printStatistics();
-
- /*! initializes the half edge data structure */
- void initializeHalfEdgeStructures ();
-
- public:
-
- /*! returns the vertex buffer for some time step */
- __forceinline const BufferView<Vec3fa>& getVertexBuffer( const size_t t = 0 ) const {
- return vertices[t];
- }
-
- /* returns tessellation level of edge */
- __forceinline float getEdgeLevel(const size_t i) const
- {
- if (levels) return clamp(levels[i],1.0f,4096.0f); // FIXME: do we want to limit edge level?
- else return clamp(tessellationRate,1.0f,4096.0f); // FIXME: do we want to limit edge level?
- }
-
- public:
- RTCDisplacementFunctionN displFunc; //!< displacement function
-
- /*! all buffers in this section are provided by the application */
- public:
-
- /*! the topology contains all data that may differ when
- * interpolating different user data buffers */
- struct Topology
- {
- public:
-
- /*! Default topology construction */
- Topology () : halfEdges(nullptr,0) {}
-
- /*! Topology initialization */
- Topology (SubdivMesh* mesh);
-
- /*! make the class movable */
- public:
- Topology (Topology&& other) // FIXME: this is only required to workaround compilation issues under Windows
- : mesh(std::move(other.mesh)),
- vertexIndices(std::move(other.vertexIndices)),
- subdiv_mode(std::move(other.subdiv_mode)),
- halfEdges(std::move(other.halfEdges)),
- halfEdges0(std::move(other.halfEdges0)),
- halfEdges1(std::move(other.halfEdges1)) {}
-
- Topology& operator= (Topology&& other) // FIXME: this is only required to workaround compilation issues under Windows
- {
- mesh = std::move(other.mesh);
- vertexIndices = std::move(other.vertexIndices);
- subdiv_mode = std::move(other.subdiv_mode);
- halfEdges = std::move(other.halfEdges);
- halfEdges0 = std::move(other.halfEdges0);
- halfEdges1 = std::move(other.halfEdges1);
- return *this;
- }
-
- public:
- /*! check if the i'th primitive is valid in this topology */
- __forceinline bool valid(size_t i) const
- {
- if (unlikely(subdiv_mode == RTC_SUBDIVISION_MODE_NO_BOUNDARY)) {
- if (getHalfEdge(i)->faceHasBorder()) return false;
- }
- return true;
- }
-
- /*! updates the interpolation mode for the topology */
- void setSubdivisionMode (RTCSubdivisionMode mode);
-
- /*! marks all buffers as modified */
- void update ();
-
- /*! verifies index array */
- bool verify (size_t numVertices);
-
- /*! initializes the half edge data structure */
- void initializeHalfEdgeStructures ();
-
- private:
-
- /*! recalculates the half edges */
- void calculateHalfEdges();
-
- /*! updates half edges when recalculation is not necessary */
- void updateHalfEdges();
-
- /*! user input data */
- public:
-
- SubdivMesh* mesh;
-
- /*! indices of the vertices composing each face */
- BufferView<unsigned int> vertexIndices;
-
- /*! subdiv interpolation mode */
- RTCSubdivisionMode subdiv_mode;
-
- /*! generated data */
- public:
-
- /*! returns the start half edge for face f */
- __forceinline const HalfEdge* getHalfEdge ( const size_t f ) const {
- return &halfEdges[mesh->faceStartEdge[f]];
- }
-
- /*! Half edge structure, generated by initHalfEdgeStructures */
- mvector<HalfEdge> halfEdges;
-
- /*! the following data is only required during construction of the
- * half edge structure and can be cleared for static scenes */
- private:
-
- /*! two arrays used to sort the half edges */
- std::vector<KeyHalfEdge> halfEdges0;
- std::vector<KeyHalfEdge> halfEdges1;
- };
-
- /*! returns the start half edge for topology t and face f */
- __forceinline const HalfEdge* getHalfEdge ( const size_t t , const size_t f ) const {
- return topology[t].getHalfEdge(f);
- }
-
- /*! buffer containing the number of vertices for each face */
- BufferView<unsigned int> faceVertices;
-
- /*! array of topologies */
- vector<Topology> topology;
-
- /*! vertex buffer (one buffer for each time step) */
- vector<BufferView<Vec3fa>> vertices;
-
- /*! user data buffers */
- vector<RawBufferView> vertexAttribs;
-
- /*! edge crease buffer containing edges (pairs of vertices) that carry edge crease weights */
- BufferView<Edge> edge_creases;
-
- /*! edge crease weights for each edge of the edge_creases buffer */
- BufferView<float> edge_crease_weights;
-
- /*! vertex crease buffer containing all vertices that carry vertex crease weights */
- BufferView<unsigned int> vertex_creases;
-
- /*! vertex crease weights for each vertex of the vertex_creases buffer */
- BufferView<float> vertex_crease_weights;
-
- /*! subdivision level for each half edge of the vertexIndices buffer */
- BufferView<float> levels;
- float tessellationRate; // constant rate that is used when levels is not set
-
- /*! buffer that marks specific faces as holes */
- BufferView<unsigned> holes;
-
- /*! all data in this section is generated by initializeHalfEdgeStructures function */
- private:
-
- /*! number of half edges used by faces */
- size_t numHalfEdges;
-
- /*! fast lookup table to find the first half edge for some face */
- mvector<uint32_t> faceStartEdge;
-
- /*! fast lookup table to find the face for some half edge */
- mvector<uint32_t> halfEdgeFace;
-
- /*! set with all holes */
- parallel_set<uint32_t> holeSet;
-
- /*! fast lookup table to detect invalid faces */
- mvector<int8_t> invalid_face;
-
- /*! test if face i is invalid in timestep j */
- __forceinline int8_t& invalidFace(size_t i, size_t j = 0) { return invalid_face[i*numTimeSteps+j]; }
- __forceinline const int8_t& invalidFace(size_t i, size_t j = 0) const { return invalid_face[i*numTimeSteps+j]; }
-
- /*! interpolation cache */
- public:
- static __forceinline size_t numInterpolationSlots4(size_t stride) { return (stride+15)/16; }
- static __forceinline size_t numInterpolationSlots8(size_t stride) { return (stride+31)/32; }
- static __forceinline size_t interpolationSlot(size_t prim, size_t slot, size_t stride) {
- const size_t slots = numInterpolationSlots4(stride);
- assert(slot < slots);
- return slots*prim+slot;
- }
- std::vector<std::vector<SharedLazyTessellationCache::CacheEntry>> vertex_buffer_tags;
- std::vector<std::vector<SharedLazyTessellationCache::CacheEntry>> vertex_attrib_buffer_tags;
- std::vector<Patch3fa::Ref> patch_eval_trees;
-
- /*! the following data is only required during construction of the
- * half edge structure and can be cleared for static scenes */
- private:
-
- /*! map with all vertex creases */
- parallel_map<uint32_t,float> vertexCreaseMap;
-
- /*! map with all edge creases */
- parallel_map<uint64_t,float> edgeCreaseMap;
-
- protected:
-
- /*! counts number of geometry commits */
- size_t commitCounter;
- };
-
- namespace isa
- {
- struct SubdivMeshISA : public SubdivMesh
- {
- SubdivMeshISA (Device* device)
- : SubdivMesh(device) {}
-
- void interpolate(const RTCInterpolateArguments* const args);
- void interpolateN(const RTCInterpolateNArguments* const args);
- };
- }
-
- DECLARE_ISA_FUNCTION(SubdivMesh*, createSubdivMesh, Device*);
-};
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.cpp b/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.cpp
deleted file mode 100644
index d1c2750f14..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.cpp
+++ /dev/null
@@ -1,243 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "scene_triangle_mesh.h"
-#include "scene.h"
-
-namespace embree
-{
-#if defined(EMBREE_LOWEST_ISA)
-
- TriangleMesh::TriangleMesh (Device* device)
- : Geometry(device,GTY_TRIANGLE_MESH,0,1)
- {
- vertices.resize(numTimeSteps);
- }
-
- void TriangleMesh::setMask (unsigned mask)
- {
- this->mask = mask;
- Geometry::update();
- }
-
- void TriangleMesh::setNumTimeSteps (unsigned int numTimeSteps)
- {
- vertices.resize(numTimeSteps);
- Geometry::setNumTimeSteps(numTimeSteps);
- }
-
- void TriangleMesh::setVertexAttributeCount (unsigned int N)
- {
- vertexAttribs.resize(N);
- Geometry::update();
- }
-
- void TriangleMesh::setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num)
- {
- /* verify that all accesses are 4 bytes aligned */
- if (((size_t(buffer->getPtr()) + offset) & 0x3) || (stride & 0x3))
- throw_RTCError(RTC_ERROR_INVALID_OPERATION, "data must be 4 bytes aligned");
-
- if (type == RTC_BUFFER_TYPE_VERTEX)
- {
- if (format != RTC_FORMAT_FLOAT3)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid vertex buffer format");
-
- /* if buffer is larger than 16GB the premultiplied index optimization does not work */
- if (stride*num > 16ll*1024ll*1024ll*1024ll)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION, "vertex buffer can be at most 16GB large");
-
- if (slot >= vertices.size())
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid vertex buffer slot");
-
- vertices[slot].set(buffer, offset, stride, num, format);
- vertices[slot].checkPadding16();
- vertices0 = vertices[0];
- }
- else if (type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
- {
- if (format < RTC_FORMAT_FLOAT || format > RTC_FORMAT_FLOAT16)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid vertex attribute buffer format");
-
- if (slot >= vertexAttribs.size())
- throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid vertex attribute buffer slot");
-
- vertexAttribs[slot].set(buffer, offset, stride, num, format);
- vertexAttribs[slot].checkPadding16();
- }
- else if (type == RTC_BUFFER_TYPE_INDEX)
- {
- if (slot != 0)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
- if (format != RTC_FORMAT_UINT3)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid index buffer format");
-
- triangles.set(buffer, offset, stride, num, format);
- setNumPrimitives(num);
- }
- else
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown buffer type");
- }
-
- void* TriangleMesh::getBuffer(RTCBufferType type, unsigned int slot)
- {
- if (type == RTC_BUFFER_TYPE_INDEX)
- {
- if (slot != 0)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
- return triangles.getPtr();
- }
- else if (type == RTC_BUFFER_TYPE_VERTEX)
- {
- if (slot >= vertices.size())
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
- return vertices[slot].getPtr();
- }
- else if (type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
- {
- if (slot >= vertexAttribs.size())
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
- return vertexAttribs[slot].getPtr();
- }
- else
- {
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown buffer type");
- return nullptr;
- }
- }
-
- void TriangleMesh::updateBuffer(RTCBufferType type, unsigned int slot)
- {
- if (type == RTC_BUFFER_TYPE_INDEX)
- {
- if (slot != 0)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
- triangles.setModified();
- }
- else if (type == RTC_BUFFER_TYPE_VERTEX)
- {
- if (slot >= vertices.size())
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
- vertices[slot].setModified();
- }
- else if (type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
- {
- if (slot >= vertexAttribs.size())
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
- vertexAttribs[slot].setModified();
- }
- else
- {
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown buffer type");
- }
-
- Geometry::update();
- }
-
- void TriangleMesh::commit()
- {
- /* verify that stride of all time steps are identical */
- for (unsigned int t=0; t<numTimeSteps; t++)
- if (vertices[t].getStride() != vertices[0].getStride())
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"stride of vertex buffers have to be identical for each time step");
-
- Geometry::commit();
- }
-
- void TriangleMesh::addElementsToCount (GeometryCounts & counts) const
- {
- if (numTimeSteps == 1) counts.numTriangles += numPrimitives;
- else counts.numMBTriangles += numPrimitives;
- }
-
- bool TriangleMesh::verify()
- {
- /*! verify size of vertex arrays */
- if (vertices.size() == 0) return false;
- for (const auto& buffer : vertices)
- if (buffer.size() != numVertices())
- return false;
-
- /*! verify size of user vertex arrays */
- for (const auto& buffer : vertexAttribs)
- if (buffer.size() != numVertices())
- return false;
-
- /*! verify triangle indices */
- for (size_t i=0; i<size(); i++) {
- if (triangles[i].v[0] >= numVertices()) return false;
- if (triangles[i].v[1] >= numVertices()) return false;
- if (triangles[i].v[2] >= numVertices()) return false;
- }
-
- /*! verify vertices */
- for (const auto& buffer : vertices)
- for (size_t i=0; i<buffer.size(); i++)
- if (!isvalid(buffer[i]))
- return false;
-
- return true;
- }
-
- void TriangleMesh::interpolate(const RTCInterpolateArguments* const args)
- {
- unsigned int primID = args->primID;
- float u = args->u;
- float v = args->v;
- RTCBufferType bufferType = args->bufferType;
- unsigned int bufferSlot = args->bufferSlot;
- float* P = args->P;
- float* dPdu = args->dPdu;
- float* dPdv = args->dPdv;
- float* ddPdudu = args->ddPdudu;
- float* ddPdvdv = args->ddPdvdv;
- float* ddPdudv = args->ddPdudv;
- unsigned int valueCount = args->valueCount;
-
- /* calculate base pointer and stride */
- assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) ||
- (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size()));
- const char* src = nullptr;
- size_t stride = 0;
- if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) {
- src = vertexAttribs[bufferSlot].getPtr();
- stride = vertexAttribs[bufferSlot].getStride();
- } else {
- src = vertices[bufferSlot].getPtr();
- stride = vertices[bufferSlot].getStride();
- }
-
- for (unsigned int i=0; i<valueCount; i+=4)
- {
- size_t ofs = i*sizeof(float);
- const float w = 1.0f-u-v;
- const Triangle& tri = triangle(primID);
- const vbool4 valid = vint4((int)i)+vint4(step) < vint4(int(valueCount));
- const vfloat4 p0 = vfloat4::loadu(valid,(float*)&src[tri.v[0]*stride+ofs]);
- const vfloat4 p1 = vfloat4::loadu(valid,(float*)&src[tri.v[1]*stride+ofs]);
- const vfloat4 p2 = vfloat4::loadu(valid,(float*)&src[tri.v[2]*stride+ofs]);
-
- if (P) {
- vfloat4::storeu(valid,P+i,madd(w,p0,madd(u,p1,v*p2)));
- }
- if (dPdu) {
- assert(dPdu); vfloat4::storeu(valid,dPdu+i,p1-p0);
- assert(dPdv); vfloat4::storeu(valid,dPdv+i,p2-p0);
- }
- if (ddPdudu) {
- assert(ddPdudu); vfloat4::storeu(valid,ddPdudu+i,vfloat4(zero));
- assert(ddPdvdv); vfloat4::storeu(valid,ddPdvdv+i,vfloat4(zero));
- assert(ddPdudv); vfloat4::storeu(valid,ddPdudv+i,vfloat4(zero));
- }
- }
- }
-
-#endif
-
- namespace isa
- {
- TriangleMesh* createTriangleMesh(Device* device) {
- return new TriangleMeshISA(device);
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.h b/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.h
deleted file mode 100644
index eaf2e1799a..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.h
+++ /dev/null
@@ -1,264 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "geometry.h"
-#include "buffer.h"
-
-namespace embree
-{
- /*! Triangle Mesh */
- struct TriangleMesh : public Geometry
- {
- /*! type of this geometry */
- static const Geometry::GTypeMask geom_type = Geometry::MTY_TRIANGLE_MESH;
-
- /*! triangle indices */
- struct Triangle
- {
- uint32_t v[3];
-
- /*! outputs triangle indices */
- __forceinline friend embree_ostream operator<<(embree_ostream cout, const Triangle& t) {
- return cout << "Triangle { " << t.v[0] << ", " << t.v[1] << ", " << t.v[2] << " }";
- }
- };
-
- public:
-
- /*! triangle mesh construction */
- TriangleMesh (Device* device);
-
- /* geometry interface */
- public:
- void setMask(unsigned mask);
- void setNumTimeSteps (unsigned int numTimeSteps);
- void setVertexAttributeCount (unsigned int N);
- void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
- void* getBuffer(RTCBufferType type, unsigned int slot);
- void updateBuffer(RTCBufferType type, unsigned int slot);
- void commit();
- bool verify();
- void interpolate(const RTCInterpolateArguments* const args);
- void addElementsToCount (GeometryCounts & counts) const;
-
- public:
-
- /*! returns number of vertices */
- __forceinline size_t numVertices() const {
- return vertices[0].size();
- }
-
- /*! returns i'th triangle*/
- __forceinline const Triangle& triangle(size_t i) const {
- return triangles[i];
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline const Vec3fa vertex(size_t i) const {
- return vertices0[i];
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline const char* vertexPtr(size_t i) const {
- return vertices0.getPtr(i);
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const Vec3fa vertex(size_t i, size_t itime) const {
- return vertices[itime][i];
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const char* vertexPtr(size_t i, size_t itime) const {
- return vertices[itime].getPtr(i);
- }
-
- /*! calculates the bounds of the i'th triangle */
- __forceinline BBox3fa bounds(size_t i) const
- {
- const Triangle& tri = triangle(i);
- const Vec3fa v0 = vertex(tri.v[0]);
- const Vec3fa v1 = vertex(tri.v[1]);
- const Vec3fa v2 = vertex(tri.v[2]);
- return BBox3fa(min(v0,v1,v2),max(v0,v1,v2));
- }
-
- /*! calculates the bounds of the i'th triangle at the itime'th timestep */
- __forceinline BBox3fa bounds(size_t i, size_t itime) const
- {
- const Triangle& tri = triangle(i);
- const Vec3fa v0 = vertex(tri.v[0],itime);
- const Vec3fa v1 = vertex(tri.v[1],itime);
- const Vec3fa v2 = vertex(tri.v[2],itime);
- return BBox3fa(min(v0,v1,v2),max(v0,v1,v2));
- }
-
- /*! check if the i'th primitive is valid at the itime'th timestep */
- __forceinline bool valid(size_t i, size_t itime) const {
- return valid(i, make_range(itime, itime));
- }
-
- /*! check if the i'th primitive is valid between the specified time range */
- __forceinline bool valid(size_t i, const range<size_t>& itime_range) const
- {
- const Triangle& tri = triangle(i);
- if (unlikely(tri.v[0] >= numVertices())) return false;
- if (unlikely(tri.v[1] >= numVertices())) return false;
- if (unlikely(tri.v[2] >= numVertices())) return false;
-
- for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
- {
- if (!isvalid(vertex(tri.v[0],itime))) return false;
- if (!isvalid(vertex(tri.v[1],itime))) return false;
- if (!isvalid(vertex(tri.v[2],itime))) return false;
- }
-
- return true;
- }
-
- /*! calculates the linear bounds of the i'th primitive at the itimeGlobal'th time segment */
- __forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
- return LBBox3fa(bounds(i,itime+0),bounds(i,itime+1));
- }
-
- /*! calculates the build bounds of the i'th primitive, if it's valid */
- __forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
- {
- const Triangle& tri = triangle(i);
- if (unlikely(tri.v[0] >= numVertices())) return false;
- if (unlikely(tri.v[1] >= numVertices())) return false;
- if (unlikely(tri.v[2] >= numVertices())) return false;
-
- for (size_t t=0; t<numTimeSteps; t++)
- {
- const Vec3fa v0 = vertex(tri.v[0],t);
- const Vec3fa v1 = vertex(tri.v[1],t);
- const Vec3fa v2 = vertex(tri.v[2],t);
- if (unlikely(!isvalid(v0) || !isvalid(v1) || !isvalid(v2)))
- return false;
- }
-
- if (likely(bbox))
- *bbox = bounds(i);
-
- return true;
- }
-
- /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
- __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
- {
- const Triangle& tri = triangle(i);
- if (unlikely(tri.v[0] >= numVertices())) return false;
- if (unlikely(tri.v[1] >= numVertices())) return false;
- if (unlikely(tri.v[2] >= numVertices())) return false;
-
- assert(itime+1 < numTimeSteps);
- const Vec3fa a0 = vertex(tri.v[0],itime+0); if (unlikely(!isvalid(a0))) return false;
- const Vec3fa a1 = vertex(tri.v[1],itime+0); if (unlikely(!isvalid(a1))) return false;
- const Vec3fa a2 = vertex(tri.v[2],itime+0); if (unlikely(!isvalid(a2))) return false;
- const Vec3fa b0 = vertex(tri.v[0],itime+1); if (unlikely(!isvalid(b0))) return false;
- const Vec3fa b1 = vertex(tri.v[1],itime+1); if (unlikely(!isvalid(b1))) return false;
- const Vec3fa b2 = vertex(tri.v[2],itime+1); if (unlikely(!isvalid(b2))) return false;
-
- /* use bounds of first time step in builder */
- bbox = BBox3fa(min(a0,a1,a2),max(a0,a1,a2));
- return true;
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
- return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline bool linearBounds(size_t i, const BBox1f& dt, LBBox3fa& bbox) const {
- if (!valid(i, timeSegmentRange(dt))) return false;
- bbox = linearBounds(i, dt);
- return true;
- }
-
- /*! get fast access to first vertex buffer */
- __forceinline float * getCompactVertexArray () const {
- return (float*) vertices0.getPtr();
- }
-
- /* gets version info of topology */
- unsigned int getTopologyVersion() const {
- return triangles.modCounter;
- }
-
- /* returns true if topology changed */
- bool topologyChanged(unsigned int otherVersion) const {
- return triangles.isModified(otherVersion); // || numPrimitivesChanged;
- }
-
- /* returns the projected area */
- __forceinline float projectedPrimitiveArea(const size_t i) const {
- const Triangle& tri = triangle(i);
- const Vec3fa v0 = vertex(tri.v[0]);
- const Vec3fa v1 = vertex(tri.v[1]);
- const Vec3fa v2 = vertex(tri.v[2]);
- return areaProjectedTriangle(v0,v1,v2);
- }
-
- public:
- BufferView<Triangle> triangles; //!< array of triangles
- BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer
- vector<BufferView<Vec3fa>> vertices; //!< vertex array for each timestep
- vector<RawBufferView> vertexAttribs; //!< vertex attributes
- };
-
- namespace isa
- {
- struct TriangleMeshISA : public TriangleMesh
- {
- TriangleMeshISA (Device* device)
- : TriangleMesh(device) {}
-
- PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!buildBounds(j,&bounds)) continue;
- const PrimRef prim(bounds,geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!buildBounds(j,itime,bounds)) continue;
- const PrimRef prim(bounds,geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfoMB pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!valid(j, timeSegmentRange(t0t1))) continue;
- const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j));
- pinfo.add_primref(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
- };
- }
-
- DECLARE_ISA_FUNCTION(TriangleMesh*, createTriangleMesh, Device*);
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_user_geometry.h b/thirdparty/embree-aarch64/kernels/common/scene_user_geometry.h
deleted file mode 100644
index 8d11ed6986..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_user_geometry.h
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "accelset.h"
-
-namespace embree
-{
- /*! User geometry with user defined intersection functions */
- struct UserGeometry : public AccelSet
- {
- /*! type of this geometry */
- static const Geometry::GTypeMask geom_type = Geometry::MTY_USER_GEOMETRY;
-
- public:
- UserGeometry (Device* device, unsigned int items = 0, unsigned int numTimeSteps = 1);
- virtual void setMask (unsigned mask);
- virtual void setBoundsFunction (RTCBoundsFunction bounds, void* userPtr);
- virtual void setIntersectFunctionN (RTCIntersectFunctionN intersect);
- virtual void setOccludedFunctionN (RTCOccludedFunctionN occluded);
- virtual void build() {}
- virtual void addElementsToCount (GeometryCounts & counts) const;
- };
-
- namespace isa
- {
- struct UserGeometryISA : public UserGeometry
- {
- UserGeometryISA (Device* device)
- : UserGeometry(device) {}
-
- PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!buildBounds(j,&bounds)) continue;
- const PrimRef prim(bounds,geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!buildBounds(j,itime,bounds)) continue;
- const PrimRef prim(bounds,geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfoMB pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!valid(j, timeSegmentRange(t0t1))) continue;
- const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j));
- pinfo.add_primref(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
- };
- }
-
- DECLARE_ISA_FUNCTION(UserGeometry*, createUserGeometry, Device*);
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/stack_item.h b/thirdparty/embree-aarch64/kernels/common/stack_item.h
deleted file mode 100644
index 533c385365..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/stack_item.h
+++ /dev/null
@@ -1,125 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-
-namespace embree
-{
- /*! An item on the stack holds the node ID and distance of that node. */
- template<typename T>
- struct __aligned(16) StackItemT
- {
- /*! assert that the xchg function works */
- static_assert(sizeof(T) <= 12, "sizeof(T) <= 12 failed");
-
- __forceinline StackItemT() {}
-
- __forceinline StackItemT(T &ptr, unsigned &dist) : ptr(ptr), dist(dist) {}
-
- /*! use SSE instructions to swap stack items */
- __forceinline static void xchg(StackItemT& a, StackItemT& b)
- {
- const vfloat4 sse_a = vfloat4::load((float*)&a);
- const vfloat4 sse_b = vfloat4::load((float*)&b);
- vfloat4::store(&a,sse_b);
- vfloat4::store(&b,sse_a);
- }
-
- /*! Sort 2 stack items. */
- __forceinline friend void sort(StackItemT& s1, StackItemT& s2) {
- if (s2.dist < s1.dist) xchg(s2,s1);
- }
-
- /*! Sort 3 stack items. */
- __forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3)
- {
- if (s2.dist < s1.dist) xchg(s2,s1);
- if (s3.dist < s2.dist) xchg(s3,s2);
- if (s2.dist < s1.dist) xchg(s2,s1);
- }
-
- /*! Sort 4 stack items. */
- __forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3, StackItemT& s4)
- {
- if (s2.dist < s1.dist) xchg(s2,s1);
- if (s4.dist < s3.dist) xchg(s4,s3);
- if (s3.dist < s1.dist) xchg(s3,s1);
- if (s4.dist < s2.dist) xchg(s4,s2);
- if (s3.dist < s2.dist) xchg(s3,s2);
- }
-
- /*! use SSE instructions to swap stack items */
- __forceinline static void cmp_xchg(vint4& a, vint4& b)
- {
-#if defined(__AVX512VL__)
- const vboolf4 mask(shuffle<2,2,2,2>(b) < shuffle<2,2,2,2>(a));
-#else
- const vboolf4 mask0(b < a);
- const vboolf4 mask(shuffle<2,2,2,2>(mask0));
-#endif
- const vint4 c = select(mask,b,a);
- const vint4 d = select(mask,a,b);
- a = c;
- b = d;
- }
-
- /*! Sort 3 stack items. */
- __forceinline static void sort3(vint4& s1, vint4& s2, vint4& s3)
- {
- cmp_xchg(s2,s1);
- cmp_xchg(s3,s2);
- cmp_xchg(s2,s1);
- }
-
- /*! Sort 4 stack items. */
- __forceinline static void sort4(vint4& s1, vint4& s2, vint4& s3, vint4& s4)
- {
- cmp_xchg(s2,s1);
- cmp_xchg(s4,s3);
- cmp_xchg(s3,s1);
- cmp_xchg(s4,s2);
- cmp_xchg(s3,s2);
- }
-
-
- /*! Sort N stack items. */
- __forceinline friend void sort(StackItemT* begin, StackItemT* end)
- {
- for (StackItemT* i = begin+1; i != end; ++i)
- {
- const vfloat4 item = vfloat4::load((float*)i);
- const unsigned dist = i->dist;
- StackItemT* j = i;
-
- while ((j != begin) && ((j-1)->dist < dist))
- {
- vfloat4::store(j, vfloat4::load((float*)(j-1)));
- --j;
- }
-
- vfloat4::store(j, item);
- }
- }
-
- public:
- T ptr;
- unsigned dist;
- };
-
- /*! An item on the stack holds the node ID and active ray mask. */
- template<typename T>
- struct __aligned(8) StackItemMaskT
- {
- T ptr;
- size_t mask;
- };
-
- struct __aligned(8) StackItemMaskCoherent
- {
- size_t mask;
- size_t parent;
- size_t child;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/stat.cpp b/thirdparty/embree-aarch64/kernels/common/stat.cpp
deleted file mode 100644
index b73c3a8c76..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/stat.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "stat.h"
-
-namespace embree
-{
- Stat Stat::instance;
-
- Stat::Stat () {
- }
-
- Stat::~Stat ()
- {
-#ifdef EMBREE_STAT_COUNTERS
- Stat::print(std::cout);
-#endif
- }
-
- void Stat::print(std::ostream& cout)
- {
- Counters& cntrs = instance.cntrs;
- Counters::Data& data = instance.cntrs.code;
- //Counters::Data& data = instance.cntrs.active;
-
- /* print absolute numbers */
- cout << "--------- ABSOLUTE ---------" << std::endl;
- cout << " #normal_travs = " << float(data.normal.travs )*1E-6 << "M" << std::endl;
- cout << " #nodes = " << float(data.normal.trav_nodes )*1E-6 << "M" << std::endl;
- cout << " #nodes_xfm = " << float(data.normal.trav_xfm_nodes )*1E-6 << "M" << std::endl;
- cout << " #leaves = " << float(data.normal.trav_leaves )*1E-6 << "M" << std::endl;
- cout << " #prims = " << float(data.normal.trav_prims )*1E-6 << "M" << std::endl;
- cout << " #prim_hits = " << float(data.normal.trav_prim_hits )*1E-6 << "M" << std::endl;
-
- cout << " #stack nodes = " << float(data.normal.trav_stack_nodes )*1E-6 << "M" << std::endl;
- cout << " #stack pop = " << float(data.normal.trav_stack_pop )*1E-6 << "M" << std::endl;
-
- size_t normal_box_hits = 0;
- size_t weighted_box_hits = 0;
- for (size_t i=0;i<SIZE_HISTOGRAM;i++) {
- normal_box_hits += data.normal.trav_hit_boxes[i];
- weighted_box_hits += data.normal.trav_hit_boxes[i]*i;
- }
- cout << " #hit_boxes = " << normal_box_hits << " (total) distribution: ";
- float average = 0.0f;
- for (size_t i=0;i<SIZE_HISTOGRAM;i++)
- {
- float value = 100.0f * data.normal.trav_hit_boxes[i] / normal_box_hits;
- cout << "[" << i << "] " << value << " ";
- average += (float)i*data.normal.trav_hit_boxes[i] / normal_box_hits;
- }
- cout << " average = " << average << std::endl;
- for (size_t i=0;i<SIZE_HISTOGRAM;i++) cout << "[" << i << "] " << 100.0f * data.normal.trav_hit_boxes[i]*i / weighted_box_hits << " ";
- cout << std::endl;
-
- if (data.shadow.travs) {
- cout << " #shadow_travs = " << float(data.shadow.travs )*1E-6 << "M" << std::endl;
- cout << " #nodes = " << float(data.shadow.trav_nodes )*1E-6 << "M" << std::endl;
- cout << " #nodes_xfm = " << float(data.shadow.trav_xfm_nodes)*1E-6 << "M" << std::endl;
- cout << " #leaves = " << float(data.shadow.trav_leaves )*1E-6 << "M" << std::endl;
- cout << " #prims = " << float(data.shadow.trav_prims )*1E-6 << "M" << std::endl;
- cout << " #prim_hits = " << float(data.shadow.trav_prim_hits)*1E-6 << "M" << std::endl;
-
- cout << " #stack nodes = " << float(data.shadow.trav_stack_nodes )*1E-6 << "M" << std::endl;
- cout << " #stack pop = " << float(data.shadow.trav_stack_pop )*1E-6 << "M" << std::endl;
-
- size_t shadow_box_hits = 0;
- size_t weighted_shadow_box_hits = 0;
-
- for (size_t i=0;i<SIZE_HISTOGRAM;i++) {
- shadow_box_hits += data.shadow.trav_hit_boxes[i];
- weighted_shadow_box_hits += data.shadow.trav_hit_boxes[i]*i;
- }
- cout << " #hit_boxes = ";
- for (size_t i=0;i<SIZE_HISTOGRAM;i++) cout << "[" << i << "] " << 100.0f * data.shadow.trav_hit_boxes[i] / shadow_box_hits << " ";
- cout << std::endl;
- for (size_t i=0;i<SIZE_HISTOGRAM;i++) cout << "[" << i << "] " << 100.0f * data.shadow.trav_hit_boxes[i]*i / weighted_shadow_box_hits << " ";
- cout << std::endl;
- }
- cout << std::endl;
-
- /* print per traversal numbers */
- cout << "--------- PER TRAVERSAL ---------" << std::endl;
- float active_normal_travs = float(cntrs.active.normal.travs )/float(cntrs.all.normal.travs );
- float active_normal_trav_nodes = float(cntrs.active.normal.trav_nodes )/float(cntrs.all.normal.trav_nodes );
- float active_normal_trav_xfm_nodes = float(cntrs.active.normal.trav_xfm_nodes )/float(cntrs.all.normal.trav_xfm_nodes );
- float active_normal_trav_leaves = float(cntrs.active.normal.trav_leaves)/float(cntrs.all.normal.trav_leaves);
- float active_normal_trav_prims = float(cntrs.active.normal.trav_prims )/float(cntrs.all.normal.trav_prims );
- float active_normal_trav_prim_hits = float(cntrs.active.normal.trav_prim_hits )/float(cntrs.all.normal.trav_prim_hits );
- float active_normal_trav_stack_pop = float(cntrs.active.normal.trav_stack_pop )/float(cntrs.all.normal.trav_stack_pop );
-
- cout << " #normal_travs = " << float(cntrs.code.normal.travs )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_travs << "% active" << std::endl;
- cout << " #nodes = " << float(cntrs.code.normal.trav_nodes )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_nodes << "% active" << std::endl;
- cout << " #node_xfm = " << float(cntrs.code.normal.trav_xfm_nodes )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_xfm_nodes << "% active" << std::endl;
- cout << " #leaves = " << float(cntrs.code.normal.trav_leaves)/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_leaves << "% active" << std::endl;
- cout << " #prims = " << float(cntrs.code.normal.trav_prims )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_prims << "% active" << std::endl;
- cout << " #prim_hits = " << float(cntrs.code.normal.trav_prim_hits )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_prim_hits << "% active" << std::endl;
- cout << " #stack_pop = " << float(cntrs.code.normal.trav_stack_pop )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_stack_pop << "% active" << std::endl;
-
- if (cntrs.all.shadow.travs) {
- float active_shadow_travs = float(cntrs.active.shadow.travs )/float(cntrs.all.shadow.travs );
- float active_shadow_trav_nodes = float(cntrs.active.shadow.trav_nodes )/float(cntrs.all.shadow.trav_nodes );
- float active_shadow_trav_xfm_nodes = float(cntrs.active.shadow.trav_xfm_nodes )/float(cntrs.all.shadow.trav_xfm_nodes );
- float active_shadow_trav_leaves = float(cntrs.active.shadow.trav_leaves)/float(cntrs.all.shadow.trav_leaves);
- float active_shadow_trav_prims = float(cntrs.active.shadow.trav_prims )/float(cntrs.all.shadow.trav_prims );
- float active_shadow_trav_prim_hits = float(cntrs.active.shadow.trav_prim_hits )/float(cntrs.all.shadow.trav_prim_hits );
-
- cout << " #shadow_travs = " << float(cntrs.code.shadow.travs )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_travs << "% active" << std::endl;
- cout << " #nodes = " << float(cntrs.code.shadow.trav_nodes )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_nodes << "% active" << std::endl;
- cout << " #nodes_xfm = " << float(cntrs.code.shadow.trav_xfm_nodes )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_xfm_nodes << "% active" << std::endl;
- cout << " #leaves = " << float(cntrs.code.shadow.trav_leaves)/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_leaves << "% active" << std::endl;
- cout << " #prims = " << float(cntrs.code.shadow.trav_prims )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_prims << "% active" << std::endl;
- cout << " #prim_hits = " << float(cntrs.code.shadow.trav_prim_hits )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_prim_hits << "% active" << std::endl;
-
- }
- cout << std::endl;
-
- /* print user counters for performance tuning */
- cout << "--------- USER ---------" << std::endl;
- for (size_t i=0; i<10; i++)
- cout << "#user" << i << " = " << float(cntrs.user[i])/float(cntrs.all.normal.travs+cntrs.all.shadow.travs) << " per traversal" << std::endl;
-
- cout << "#user5/user3 " << 100.0f*float(cntrs.user[5])/float(cntrs.user[3]) << "%" << std::endl;
- cout << "#user6/user3 " << 100.0f*float(cntrs.user[6])/float(cntrs.user[3]) << "%" << std::endl;
- cout << "#user7/user3 " << 100.0f*float(cntrs.user[7])/float(cntrs.user[3]) << "%" << std::endl;
- cout << std::endl;
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/stat.h b/thirdparty/embree-aarch64/kernels/common/stat.h
deleted file mode 100644
index 3cda2bd014..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/stat.h
+++ /dev/null
@@ -1,116 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-
-/* Macros to gather statistics */
-#ifdef EMBREE_STAT_COUNTERS
-# define STAT(x) x
-# define STAT3(s,x,y,z) \
- STAT(Stat::get().code .s+=x); \
- STAT(Stat::get().active.s+=y); \
- STAT(Stat::get().all .s+=z);
-# define STAT_USER(i,x) Stat::get().user[i]+=x;
-#else
-# define STAT(x)
-# define STAT3(s,x,y,z)
-# define STAT_USER(i,x)
-#endif
-
-namespace embree
-{
- /*! Gathers ray tracing statistics. We count 1) how often a code
- * location is reached, 2) how many SIMD lanes are active, 3) how
- * many SIMD lanes reach the code location */
- class Stat
- {
- public:
-
- static const size_t SIZE_HISTOGRAM = 64+1;
-
- /*! constructs stat counter class */
- Stat ();
-
- /*! destructs stat counter class */
- ~Stat ();
-
- class Counters
- {
- public:
- Counters () {
- clear();
- }
-
- void clear()
- {
- all.clear();
- active.clear();
- code.clear();
- for (auto& u : user) u.store(0);
- }
-
- public:
-
- /* per packet and per ray stastics */
- struct Data
- {
- void clear () {
- normal.clear();
- shadow.clear();
- point_query.clear();
- }
-
- /* normal and shadow ray statistics */
- struct
- {
- void clear()
- {
- travs.store(0);
- trav_nodes.store(0);
- trav_leaves.store(0);
- trav_prims.store(0);
- trav_prim_hits.store(0);
- for (auto& v : trav_hit_boxes) v.store(0);
- trav_stack_pop.store(0);
- trav_stack_nodes.store(0);
- trav_xfm_nodes.store(0);
- }
-
- public:
- std::atomic<size_t> travs;
- std::atomic<size_t> trav_nodes;
- std::atomic<size_t> trav_leaves;
- std::atomic<size_t> trav_prims;
- std::atomic<size_t> trav_prim_hits;
- std::atomic<size_t> trav_hit_boxes[SIZE_HISTOGRAM+1];
- std::atomic<size_t> trav_stack_pop;
- std::atomic<size_t> trav_stack_nodes;
- std::atomic<size_t> trav_xfm_nodes;
-
- } normal, shadow, point_query;
- } all, active, code;
-
- std::atomic<size_t> user[10];
- };
-
- public:
-
- static __forceinline Counters& get() {
- return instance.cntrs;
- }
-
- static void clear() {
- instance.cntrs.clear();
- }
-
- static void print(embree_ostream cout);
-
- private:
- Counters cntrs;
-
- private:
- static Stat instance;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/state.cpp b/thirdparty/embree-aarch64/kernels/common/state.cpp
deleted file mode 100644
index 51fc9b7826..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/state.cpp
+++ /dev/null
@@ -1,543 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "state.h"
-#include "../../common/lexers/streamfilters.h"
-
-namespace embree
-{
- MutexSys g_printMutex;
-
- State::ErrorHandler State::g_errorHandler;
-
- State::ErrorHandler::ErrorHandler()
- : thread_error(createTls()) {}
-
- State::ErrorHandler::~ErrorHandler()
- {
- Lock<MutexSys> lock(errors_mutex);
- for (size_t i=0; i<thread_errors.size(); i++)
- delete thread_errors[i];
- destroyTls(thread_error);
- thread_errors.clear();
- }
-
- RTCError* State::ErrorHandler::error()
- {
- RTCError* stored_error = (RTCError*) getTls(thread_error);
- if (stored_error) return stored_error;
-
- Lock<MutexSys> lock(errors_mutex);
- stored_error = new RTCError(RTC_ERROR_NONE);
- thread_errors.push_back(stored_error);
- setTls(thread_error,stored_error);
- return stored_error;
- }
-
- State::State ()
- : enabled_cpu_features(getCPUFeatures()),
- enabled_builder_cpu_features(enabled_cpu_features),
- frequency_level(FREQUENCY_SIMD256)
- {
- tri_accel = "default";
- tri_builder = "default";
- tri_traverser = "default";
-
- tri_accel_mb = "default";
- tri_builder_mb = "default";
- tri_traverser_mb = "default";
-
- quad_accel = "default";
- quad_builder = "default";
- quad_traverser = "default";
-
- quad_accel_mb = "default";
- quad_builder_mb = "default";
- quad_traverser_mb = "default";
-
- line_accel = "default";
- line_builder = "default";
- line_traverser = "default";
-
- line_accel_mb = "default";
- line_builder_mb = "default";
- line_traverser_mb = "default";
-
- hair_accel = "default";
- hair_builder = "default";
- hair_traverser = "default";
-
- hair_accel_mb = "default";
- hair_builder_mb = "default";
- hair_traverser_mb = "default";
-
- object_accel = "default";
- object_builder = "default";
- object_accel_min_leaf_size = 1;
- object_accel_max_leaf_size = 1;
-
- object_accel_mb = "default";
- object_builder_mb = "default";
- object_accel_mb_min_leaf_size = 1;
- object_accel_mb_max_leaf_size = 1;
-
- max_spatial_split_replications = 1.2f;
- useSpatialPreSplits = false;
-
- tessellation_cache_size = 128*1024*1024;
-
- subdiv_accel = "default";
- subdiv_accel_mb = "default";
-
- grid_accel = "default";
- grid_builder = "default";
- grid_accel_mb = "default";
- grid_builder_mb = "default";
-
- instancing_open_min = 0;
- instancing_block_size = 0;
- instancing_open_factor = 8.0f;
- instancing_open_max_depth = 32;
- instancing_open_max = 50000000;
-
- ignore_config_files = false;
- float_exceptions = false;
- quality_flags = -1;
- scene_flags = -1;
- verbose = 0;
- benchmark = 0;
-
- numThreads = 0;
- numUserThreads = 0;
-
-#if TASKING_INTERNAL
- set_affinity = true;
-#else
- set_affinity = false;
-#endif
- /* per default enable affinity on KNL */
- if (hasISA(AVX512KNL)) set_affinity = true;
-
- start_threads = false;
- enable_selockmemoryprivilege = false;
-#if defined(__LINUX__)
- hugepages = true;
-#else
- hugepages = false;
-#endif
- hugepages_success = true;
-
- alloc_main_block_size = 0;
- alloc_num_main_slots = 0;
- alloc_thread_block_size = 0;
- alloc_single_thread_alloc = -1;
-
- error_function = nullptr;
- error_function_userptr = nullptr;
-
- memory_monitor_function = nullptr;
- memory_monitor_userptr = nullptr;
- }
-
- State::~State() {
- }
-
- bool State::hasISA(const int isa) {
- return (enabled_cpu_features & isa) == isa;
- }
-
- bool State::checkISASupport() {
-#if defined(__ARM_NEON)
- /*
- * NEON CPU type is a mixture of NEON and SSE2
- */
-
- bool hasSSE2 = (getCPUFeatures() & enabled_cpu_features) & CPU_FEATURE_SSE2;
-
- /* this will be true when explicitly initialize Device with `isa=neon` config */
- bool hasNEON = (getCPUFeatures() & enabled_cpu_features) & CPU_FEATURE_NEON;
-
- return hasSSE2 || hasNEON;
-#else
- return (getCPUFeatures() & enabled_cpu_features) == enabled_cpu_features;
-#endif
- }
-
- void State::verify()
- {
- /* verify that calculations stay in range */
- assert(rcp(min_rcp_input)*FLT_LARGE+FLT_LARGE < 0.01f*FLT_MAX);
-
- /* here we verify that CPP files compiled for a specific ISA only
- * call that same or lower ISA version of non-inlined class member
- * functions */
-#if defined(DEBUG)
-#if defined(EMBREE_TARGET_SSE2)
-#if !defined(__ARM_NEON)
- assert(sse2::getISA() <= SSE2);
-#endif
-#endif
-#if defined(EMBREE_TARGET_SSE42)
- assert(sse42::getISA() <= SSE42);
-#endif
-#if defined(EMBREE_TARGET_AVX)
- assert(avx::getISA() <= AVX);
-#endif
-#if defined(EMBREE_TARGET_AVX2)
- assert(avx2::getISA() <= AVX2);
-#endif
-#if defined (EMBREE_TARGET_AVX512KNL)
- assert(avx512knl::getISA() <= AVX512KNL);
-#endif
-#if defined (EMBREE_TARGET_AVX512SKX)
- assert(avx512skx::getISA() <= AVX512SKX);
-#endif
-#endif
- }
-
- const char* symbols[3] = { "=", ",", "|" };
-
- bool State::parseFile(const FileName& fileName)
- {
- FILE* f = fopen(fileName.c_str(),"r");
- if (!f) return false;
- Ref<Stream<int> > file = new FileStream(f,fileName);
-
- std::vector<std::string> syms;
- for (size_t i=0; i<sizeof(symbols)/sizeof(void*); i++)
- syms.push_back(symbols[i]);
-
- Ref<TokenStream> cin = new TokenStream(new LineCommentFilter(file,"#"),
- TokenStream::alpha+TokenStream::ALPHA+TokenStream::numbers+"_.",
- TokenStream::separators,syms);
- parse(cin);
- return true;
- }
-
- void State::parseString(const char* cfg)
- {
- if (cfg == nullptr) return;
-
- std::vector<std::string> syms;
- for (size_t i=0; i<sizeof(symbols)/sizeof(void*); i++)
- syms.push_back(symbols[i]);
-
- Ref<TokenStream> cin = new TokenStream(new StrStream(cfg),
- TokenStream::alpha+TokenStream::ALPHA+TokenStream::numbers+"_.",
- TokenStream::separators,syms);
- parse(cin);
- }
-
- int string_to_cpufeatures(const std::string& isa)
- {
- if (isa == "sse" ) return SSE;
- else if (isa == "sse2") return SSE2;
- else if (isa == "sse3") return SSE3;
- else if (isa == "ssse3") return SSSE3;
- else if (isa == "sse41") return SSE41;
- else if (isa == "sse4.1") return SSE41;
- else if (isa == "sse42") return SSE42;
- else if (isa == "sse4.2") return SSE42;
- else if (isa == "avx") return AVX;
- else if (isa == "avxi") return AVXI;
- else if (isa == "avx2") return AVX2;
- else if (isa == "avx512knl") return AVX512KNL;
- else if (isa == "avx512skx") return AVX512SKX;
- else return SSE2;
- }
-
- void State::parse(Ref<TokenStream> cin)
- {
- /* parse until end of stream */
- while (cin->peek() != Token::Eof())
- {
- const Token tok = cin->get();
-
- if (tok == Token::Id("threads") && cin->trySymbol("="))
- numThreads = cin->get().Int();
-
- else if (tok == Token::Id("user_threads")&& cin->trySymbol("="))
- numUserThreads = cin->get().Int();
-
- else if (tok == Token::Id("set_affinity")&& cin->trySymbol("="))
- set_affinity = cin->get().Int();
-
- else if (tok == Token::Id("affinity")&& cin->trySymbol("="))
- set_affinity = cin->get().Int();
-
- else if (tok == Token::Id("start_threads")&& cin->trySymbol("="))
- start_threads = cin->get().Int();
-
- else if (tok == Token::Id("isa") && cin->trySymbol("=")) {
- std::string isa = toLowerCase(cin->get().Identifier());
- enabled_cpu_features = string_to_cpufeatures(isa);
- enabled_builder_cpu_features = enabled_cpu_features;
- }
-
- else if (tok == Token::Id("max_isa") && cin->trySymbol("=")) {
- std::string isa = toLowerCase(cin->get().Identifier());
- enabled_cpu_features &= string_to_cpufeatures(isa);
- enabled_builder_cpu_features &= enabled_cpu_features;
- }
-
- else if (tok == Token::Id("max_builder_isa") && cin->trySymbol("=")) {
- std::string isa = toLowerCase(cin->get().Identifier());
- enabled_builder_cpu_features &= string_to_cpufeatures(isa);
- }
-
- else if (tok == Token::Id("frequency_level") && cin->trySymbol("=")) {
- std::string freq = cin->get().Identifier();
- if (freq == "simd128") frequency_level = FREQUENCY_SIMD128;
- else if (freq == "simd256") frequency_level = FREQUENCY_SIMD256;
- else if (freq == "simd512") frequency_level = FREQUENCY_SIMD512;
- }
-
- else if (tok == Token::Id("enable_selockmemoryprivilege") && cin->trySymbol("=")) {
- enable_selockmemoryprivilege = cin->get().Int();
- }
- else if (tok == Token::Id("hugepages") && cin->trySymbol("=")) {
- hugepages = cin->get().Int();
- }
-
- else if (tok == Token::Id("ignore_config_files") && cin->trySymbol("="))
- ignore_config_files = cin->get().Int();
- else if (tok == Token::Id("float_exceptions") && cin->trySymbol("="))
- float_exceptions = cin->get().Int();
-
- else if ((tok == Token::Id("tri_accel") || tok == Token::Id("accel")) && cin->trySymbol("="))
- tri_accel = cin->get().Identifier();
- else if ((tok == Token::Id("tri_builder") || tok == Token::Id("builder")) && cin->trySymbol("="))
- tri_builder = cin->get().Identifier();
- else if ((tok == Token::Id("tri_traverser") || tok == Token::Id("traverser")) && cin->trySymbol("="))
- tri_traverser = cin->get().Identifier();
-
- else if ((tok == Token::Id("tri_accel_mb") || tok == Token::Id("accel_mb")) && cin->trySymbol("="))
- tri_accel_mb = cin->get().Identifier();
- else if ((tok == Token::Id("tri_builder_mb") || tok == Token::Id("builder_mb")) && cin->trySymbol("="))
- tri_builder_mb = cin->get().Identifier();
- else if ((tok == Token::Id("tri_traverser_mb") || tok == Token::Id("traverser_mb")) && cin->trySymbol("="))
- tri_traverser_mb = cin->get().Identifier();
-
- else if ((tok == Token::Id("quad_accel")) && cin->trySymbol("="))
- quad_accel = cin->get().Identifier();
- else if ((tok == Token::Id("quad_builder")) && cin->trySymbol("="))
- quad_builder = cin->get().Identifier();
- else if ((tok == Token::Id("quad_traverser")) && cin->trySymbol("="))
- quad_traverser = cin->get().Identifier();
-
- else if ((tok == Token::Id("quad_accel_mb")) && cin->trySymbol("="))
- quad_accel_mb = cin->get().Identifier();
- else if ((tok == Token::Id("quad_builder_mb")) && cin->trySymbol("="))
- quad_builder_mb = cin->get().Identifier();
- else if ((tok == Token::Id("quad_traverser_mb")) && cin->trySymbol("="))
- quad_traverser_mb = cin->get().Identifier();
-
- else if ((tok == Token::Id("line_accel")) && cin->trySymbol("="))
- line_accel = cin->get().Identifier();
- else if ((tok == Token::Id("line_builder")) && cin->trySymbol("="))
- line_builder = cin->get().Identifier();
- else if ((tok == Token::Id("line_traverser")) && cin->trySymbol("="))
- line_traverser = cin->get().Identifier();
-
- else if ((tok == Token::Id("line_accel_mb")) && cin->trySymbol("="))
- line_accel_mb = cin->get().Identifier();
- else if ((tok == Token::Id("line_builder_mb")) && cin->trySymbol("="))
- line_builder_mb = cin->get().Identifier();
- else if ((tok == Token::Id("line_traverser_mb")) && cin->trySymbol("="))
- line_traverser_mb = cin->get().Identifier();
-
- else if (tok == Token::Id("hair_accel") && cin->trySymbol("="))
- hair_accel = cin->get().Identifier();
- else if (tok == Token::Id("hair_builder") && cin->trySymbol("="))
- hair_builder = cin->get().Identifier();
- else if (tok == Token::Id("hair_traverser") && cin->trySymbol("="))
- hair_traverser = cin->get().Identifier();
-
- else if (tok == Token::Id("hair_accel_mb") && cin->trySymbol("="))
- hair_accel_mb = cin->get().Identifier();
- else if (tok == Token::Id("hair_builder_mb") && cin->trySymbol("="))
- hair_builder_mb = cin->get().Identifier();
- else if (tok == Token::Id("hair_traverser_mb") && cin->trySymbol("="))
- hair_traverser_mb = cin->get().Identifier();
-
- else if (tok == Token::Id("object_accel") && cin->trySymbol("="))
- object_accel = cin->get().Identifier();
- else if (tok == Token::Id("object_builder") && cin->trySymbol("="))
- object_builder = cin->get().Identifier();
- else if (tok == Token::Id("object_accel_min_leaf_size") && cin->trySymbol("="))
- object_accel_min_leaf_size = cin->get().Int();
- else if (tok == Token::Id("object_accel_max_leaf_size") && cin->trySymbol("="))
- object_accel_max_leaf_size = cin->get().Int();
-
- else if (tok == Token::Id("object_accel_mb") && cin->trySymbol("="))
- object_accel_mb = cin->get().Identifier();
- else if (tok == Token::Id("object_builder_mb") && cin->trySymbol("="))
- object_builder_mb = cin->get().Identifier();
- else if (tok == Token::Id("object_accel_mb_min_leaf_size") && cin->trySymbol("="))
- object_accel_mb_min_leaf_size = cin->get().Int();
- else if (tok == Token::Id("object_accel_mb_max_leaf_size") && cin->trySymbol("="))
- object_accel_mb_max_leaf_size = cin->get().Int();
-
- else if (tok == Token::Id("instancing_open_min") && cin->trySymbol("="))
- instancing_open_min = cin->get().Int();
- else if (tok == Token::Id("instancing_block_size") && cin->trySymbol("=")) {
- instancing_block_size = cin->get().Int();
- instancing_open_factor = 0.0f;
- }
- else if (tok == Token::Id("instancing_open_max_depth") && cin->trySymbol("="))
- instancing_open_max_depth = cin->get().Int();
- else if (tok == Token::Id("instancing_open_factor") && cin->trySymbol("=")) {
- instancing_block_size = 0;
- instancing_open_factor = cin->get().Float();
- }
- else if (tok == Token::Id("instancing_open_max") && cin->trySymbol("="))
- instancing_open_max = cin->get().Int();
-
- else if (tok == Token::Id("subdiv_accel") && cin->trySymbol("="))
- subdiv_accel = cin->get().Identifier();
- else if (tok == Token::Id("subdiv_accel_mb") && cin->trySymbol("="))
- subdiv_accel_mb = cin->get().Identifier();
-
- else if (tok == Token::Id("grid_accel") && cin->trySymbol("="))
- grid_accel = cin->get().Identifier();
- else if (tok == Token::Id("grid_accel_mb") && cin->trySymbol("="))
- grid_accel_mb = cin->get().Identifier();
-
- else if (tok == Token::Id("verbose") && cin->trySymbol("="))
- verbose = cin->get().Int();
- else if (tok == Token::Id("benchmark") && cin->trySymbol("="))
- benchmark = cin->get().Int();
-
- else if (tok == Token::Id("quality")) {
- if (cin->trySymbol("=")) {
- Token flag = cin->get();
- if (flag == Token::Id("low")) quality_flags = RTC_BUILD_QUALITY_LOW;
- else if (flag == Token::Id("medium")) quality_flags = RTC_BUILD_QUALITY_MEDIUM;
- else if (flag == Token::Id("high")) quality_flags = RTC_BUILD_QUALITY_HIGH;
- }
- }
-
- else if (tok == Token::Id("scene_flags")) {
- scene_flags = 0;
- if (cin->trySymbol("=")) {
- do {
- Token flag = cin->get();
- if (flag == Token::Id("dynamic") ) scene_flags |= RTC_SCENE_FLAG_DYNAMIC;
- else if (flag == Token::Id("compact")) scene_flags |= RTC_SCENE_FLAG_COMPACT;
- else if (flag == Token::Id("robust")) scene_flags |= RTC_SCENE_FLAG_ROBUST;
- } while (cin->trySymbol("|"));
- }
- }
-
- else if (tok == Token::Id("max_spatial_split_replications") && cin->trySymbol("="))
- max_spatial_split_replications = cin->get().Float();
-
- else if (tok == Token::Id("presplits") && cin->trySymbol("="))
- useSpatialPreSplits = cin->get().Int() != 0 ? true : false;
-
- else if (tok == Token::Id("tessellation_cache_size") && cin->trySymbol("="))
- tessellation_cache_size = size_t(cin->get().Float()*1024.0f*1024.0f);
- else if (tok == Token::Id("cache_size") && cin->trySymbol("="))
- tessellation_cache_size = size_t(cin->get().Float()*1024.0f*1024.0f);
-
- else if (tok == Token::Id("alloc_main_block_size") && cin->trySymbol("="))
- alloc_main_block_size = cin->get().Int();
- else if (tok == Token::Id("alloc_num_main_slots") && cin->trySymbol("="))
- alloc_num_main_slots = cin->get().Int();
- else if (tok == Token::Id("alloc_thread_block_size") && cin->trySymbol("="))
- alloc_thread_block_size = cin->get().Int();
- else if (tok == Token::Id("alloc_single_thread_alloc") && cin->trySymbol("="))
- alloc_single_thread_alloc = cin->get().Int();
-
- cin->trySymbol(","); // optional , separator
- }
- }
-
- bool State::verbosity(size_t N) {
- return N <= verbose;
- }
-
- void State::print()
- {
- std::cout << "general:" << std::endl;
- std::cout << " build threads = " << numThreads << std::endl;
- std::cout << " build user threads = " << numUserThreads << std::endl;
- std::cout << " start_threads = " << start_threads << std::endl;
- std::cout << " affinity = " << set_affinity << std::endl;
- std::cout << " frequency_level = ";
- switch (frequency_level) {
- case FREQUENCY_SIMD128: std::cout << "simd128" << std::endl; break;
- case FREQUENCY_SIMD256: std::cout << "simd256" << std::endl; break;
- case FREQUENCY_SIMD512: std::cout << "simd512" << std::endl; break;
- default: std::cout << "error" << std::endl; break;
- }
-
- std::cout << " hugepages = ";
- if (!hugepages) std::cout << "disabled" << std::endl;
- else if (hugepages_success) std::cout << "enabled" << std::endl;
- else std::cout << "failed" << std::endl;
-
- std::cout << " verbosity = " << verbose << std::endl;
- std::cout << " cache_size = " << float(tessellation_cache_size)*1E-6 << " MB" << std::endl;
- std::cout << " max_spatial_split_replications = " << max_spatial_split_replications << std::endl;
-
- std::cout << "triangles:" << std::endl;
- std::cout << " accel = " << tri_accel << std::endl;
- std::cout << " builder = " << tri_builder << std::endl;
- std::cout << " traverser = " << tri_traverser << std::endl;
-
- std::cout << "motion blur triangles:" << std::endl;
- std::cout << " accel = " << tri_accel_mb << std::endl;
- std::cout << " builder = " << tri_builder_mb << std::endl;
- std::cout << " traverser = " << tri_traverser_mb << std::endl;
-
- std::cout << "quads:" << std::endl;
- std::cout << " accel = " << quad_accel << std::endl;
- std::cout << " builder = " << quad_builder << std::endl;
- std::cout << " traverser = " << quad_traverser << std::endl;
-
- std::cout << "motion blur quads:" << std::endl;
- std::cout << " accel = " << quad_accel_mb << std::endl;
- std::cout << " builder = " << quad_builder_mb << std::endl;
- std::cout << " traverser = " << quad_traverser_mb << std::endl;
-
- std::cout << "line segments:" << std::endl;
- std::cout << " accel = " << line_accel << std::endl;
- std::cout << " builder = " << line_builder << std::endl;
- std::cout << " traverser = " << line_traverser << std::endl;
-
- std::cout << "motion blur line segments:" << std::endl;
- std::cout << " accel = " << line_accel_mb << std::endl;
- std::cout << " builder = " << line_builder_mb << std::endl;
- std::cout << " traverser = " << line_traverser_mb << std::endl;
-
- std::cout << "hair:" << std::endl;
- std::cout << " accel = " << hair_accel << std::endl;
- std::cout << " builder = " << hair_builder << std::endl;
- std::cout << " traverser = " << hair_traverser << std::endl;
-
- std::cout << "motion blur hair:" << std::endl;
- std::cout << " accel = " << hair_accel_mb << std::endl;
- std::cout << " builder = " << hair_builder_mb << std::endl;
- std::cout << " traverser = " << hair_traverser_mb << std::endl;
-
- std::cout << "subdivision surfaces:" << std::endl;
- std::cout << " accel = " << subdiv_accel << std::endl;
-
- std::cout << "grids:" << std::endl;
- std::cout << " accel = " << grid_accel << std::endl;
- std::cout << " builder = " << grid_builder << std::endl;
-
- std::cout << "motion blur grids:" << std::endl;
- std::cout << " accel = " << grid_accel_mb << std::endl;
- std::cout << " builder = " << grid_builder_mb << std::endl;
-
- std::cout << "object_accel:" << std::endl;
- std::cout << " min_leaf_size = " << object_accel_min_leaf_size << std::endl;
- std::cout << " max_leaf_size = " << object_accel_max_leaf_size << std::endl;
-
- std::cout << "object_accel_mb:" << std::endl;
- std::cout << " min_leaf_size = " << object_accel_mb_min_leaf_size << std::endl;
- std::cout << " max_leaf_size = " << object_accel_mb_max_leaf_size << std::endl;
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/state.h b/thirdparty/embree-aarch64/kernels/common/state.h
deleted file mode 100644
index d0fccc023f..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/state.h
+++ /dev/null
@@ -1,197 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-
-namespace embree
-{
- /* mutex to make printing to cout thread safe */
- extern MutexSys g_printMutex;
-
- struct State : public RefCount
- {
- public:
- /*! state construction */
- State ();
-
- /*! state destruction */
- ~State();
-
- /*! verifies that state is correct */
- void verify();
-
- /*! parses state from a configuration file */
- bool parseFile(const FileName& fileName);
-
- /*! parses the state from a string */
- void parseString(const char* cfg);
-
- /*! parses the state from a stream */
- void parse(Ref<TokenStream> cin);
-
- /*! prints the state */
- void print();
-
- /*! checks if verbosity level is at least N */
- bool verbosity(size_t N);
-
- /*! checks if some particular ISA is enabled */
- bool hasISA(const int isa);
-
- /*! check whether selected ISA is supported by the HW */
- bool checkISASupport();
-
- public:
- std::string tri_accel; //!< acceleration structure to use for triangles
- std::string tri_builder; //!< builder to use for triangles
- std::string tri_traverser; //!< traverser to use for triangles
-
- public:
- std::string tri_accel_mb; //!< acceleration structure to use for motion blur triangles
- std::string tri_builder_mb; //!< builder to use for motion blur triangles
- std::string tri_traverser_mb; //!< traverser to use for triangles
-
- public:
- std::string quad_accel; //!< acceleration structure to use for quads
- std::string quad_builder; //!< builder to use for quads
- std::string quad_traverser; //!< traverser to use for quads
-
- public:
- std::string quad_accel_mb; //!< acceleration structure to use for motion blur quads
- std::string quad_builder_mb; //!< builder to use for motion blur quads
- std::string quad_traverser_mb; //!< traverser to use for motion blur quads
-
- public:
- std::string line_accel; //!< acceleration structure to use for line segments
- std::string line_builder; //!< builder to use for line segments
- std::string line_traverser; //!< traverser to use for line segments
-
- public:
- std::string line_accel_mb; //!< acceleration structure to use for motion blur line segments
- std::string line_builder_mb; //!< builder to use for motion blur line segments
- std::string line_traverser_mb; //!< traverser to use for motion blur line segments
-
- public:
- std::string hair_accel; //!< hair acceleration structure to use
- std::string hair_builder; //!< builder to use for hair
- std::string hair_traverser; //!< traverser to use for hair
-
- public:
- std::string hair_accel_mb; //!< acceleration structure to use for motion blur hair
- std::string hair_builder_mb; //!< builder to use for motion blur hair
- std::string hair_traverser_mb; //!< traverser to use for motion blur hair
-
- public:
- std::string object_accel; //!< acceleration structure for user geometries
- std::string object_builder; //!< builder for user geometries
- int object_accel_min_leaf_size; //!< minimum leaf size for object acceleration structure
- int object_accel_max_leaf_size; //!< maximum leaf size for object acceleration structure
-
- public:
- std::string object_accel_mb; //!< acceleration structure for user geometries
- std::string object_builder_mb; //!< builder for user geometries
- int object_accel_mb_min_leaf_size; //!< minimum leaf size for mblur object acceleration structure
- int object_accel_mb_max_leaf_size; //!< maximum leaf size for mblur object acceleration structure
-
- public:
- std::string subdiv_accel; //!< acceleration structure to use for subdivision surfaces
- std::string subdiv_accel_mb; //!< acceleration structure to use for subdivision surfaces
-
- public:
- std::string grid_accel; //!< acceleration structure to use for grids
- std::string grid_builder; //!< builder for grids
- std::string grid_accel_mb; //!< acceleration structure to use for motion blur grids
- std::string grid_builder_mb; //!< builder for motion blur grids
-
- public:
- float max_spatial_split_replications; //!< maximally replications*N many primitives in accel for spatial splits
- bool useSpatialPreSplits; //!< use spatial pre-splits instead of the full spatial split builder
- size_t tessellation_cache_size; //!< size of the shared tessellation cache
-
- public:
- size_t instancing_open_min; //!< instancing opens tree to minimally that number of subtrees
- size_t instancing_block_size; //!< instancing opens tree up to average block size of primitives
- float instancing_open_factor; //!< instancing opens tree up to x times the number of instances
- size_t instancing_open_max_depth; //!< maximum open depth for geometries
- size_t instancing_open_max; //!< instancing opens tree to maximally that number of subtrees
-
- public:
- bool ignore_config_files; //!< if true no more config files get parse
- bool float_exceptions; //!< enable floating point exceptions
- int quality_flags;
- int scene_flags;
- size_t verbose; //!< verbosity of output
- size_t benchmark; //!< true
-
- public:
- size_t numThreads; //!< number of threads to use in builders
- size_t numUserThreads; //!< number of user provided threads to use in builders
- bool set_affinity; //!< sets affinity for worker threads
- bool start_threads; //!< true when threads should be started at device creation time
- int enabled_cpu_features; //!< CPU ISA features to use
- int enabled_builder_cpu_features; //!< CPU ISA features to use for builders only
- enum FREQUENCY_LEVEL {
- FREQUENCY_SIMD128,
- FREQUENCY_SIMD256,
- FREQUENCY_SIMD512
- } frequency_level; //!< frequency level the app wants to run on (default is SIMD256)
- bool enable_selockmemoryprivilege; //!< configures the SeLockMemoryPrivilege under Windows to enable huge pages
- bool hugepages; //!< true if huge pages should get used
- bool hugepages_success; //!< status for enabling huge pages
-
- public:
- size_t alloc_main_block_size; //!< main allocation block size (shared between threads)
- int alloc_num_main_slots; //!< number of such shared blocks to be used to allocate
- size_t alloc_thread_block_size; //!< size of thread local allocator block size
- int alloc_single_thread_alloc; //!< in single mode nodes and leaves use same thread local allocator
-
- public:
-
- /*! checks if we can use AVX */
- bool canUseAVX() {
- return hasISA(AVX) && frequency_level != FREQUENCY_SIMD128;
- }
-
- /*! checks if we can use AVX2 */
- bool canUseAVX2() {
- return hasISA(AVX2) && frequency_level != FREQUENCY_SIMD128;
- }
-
- struct ErrorHandler
- {
- public:
- ErrorHandler();
- ~ErrorHandler();
- RTCError* error();
-
- public:
- tls_t thread_error;
- std::vector<RTCError*> thread_errors;
- MutexSys errors_mutex;
- };
- ErrorHandler errorHandler;
- static ErrorHandler g_errorHandler;
-
- public:
- void setErrorFunction(RTCErrorFunction fptr, void* uptr)
- {
- error_function = fptr;
- error_function_userptr = uptr;
- }
-
- RTCErrorFunction error_function;
- void* error_function_userptr;
-
- public:
- void setMemoryMonitorFunction(RTCMemoryMonitorFunction fptr, void* uptr)
- {
- memory_monitor_function = fptr;
- memory_monitor_userptr = uptr;
- }
-
- RTCMemoryMonitorFunction memory_monitor_function;
- void* memory_monitor_userptr;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/vector.h b/thirdparty/embree-aarch64/kernels/common/vector.h
deleted file mode 100644
index b478762240..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/vector.h
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "default.h"
-
-namespace embree
-{
- /*! invokes the memory monitor callback */
- struct MemoryMonitorInterface {
- virtual void memoryMonitor(ssize_t bytes, bool post) = 0;
- };
-
- /*! allocator that performs aligned monitored allocations */
- template<typename T, size_t alignment = 64>
- struct aligned_monitored_allocator
- {
- typedef T value_type;
- typedef T* pointer;
- typedef const T* const_pointer;
- typedef T& reference;
- typedef const T& const_reference;
- typedef std::size_t size_type;
- typedef std::ptrdiff_t difference_type;
-
- __forceinline aligned_monitored_allocator(MemoryMonitorInterface* device)
- : device(device), hugepages(false) {}
-
- __forceinline pointer allocate( size_type n )
- {
- if (n) {
- assert(device);
- device->memoryMonitor(n*sizeof(T),false);
- }
- if (n*sizeof(value_type) >= 14 * PAGE_SIZE_2M)
- {
- pointer p = (pointer) os_malloc(n*sizeof(value_type),hugepages);
- assert(p);
- return p;
- }
- return (pointer) alignedMalloc(n*sizeof(value_type),alignment);
- }
-
- __forceinline void deallocate( pointer p, size_type n )
- {
- if (p)
- {
- if (n*sizeof(value_type) >= 14 * PAGE_SIZE_2M)
- os_free(p,n*sizeof(value_type),hugepages);
- else
- alignedFree(p);
- }
- else assert(n == 0);
-
- if (n) {
- assert(device);
- device->memoryMonitor(-ssize_t(n)*sizeof(T),true);
- }
- }
-
- __forceinline void construct( pointer p, const_reference val ) {
- new (p) T(val);
- }
-
- __forceinline void destroy( pointer p ) {
- p->~T();
- }
-
- private:
- MemoryMonitorInterface* device;
- bool hugepages;
- };
-
- /*! monitored vector */
- template<typename T>
- using mvector = vector_t<T,aligned_monitored_allocator<T,std::alignment_of<T>::value> >;
-}
diff --git a/thirdparty/embree-aarch64/kernels/config.h b/thirdparty/embree-aarch64/kernels/config.h
deleted file mode 100644
index 80a8ab2a56..0000000000
--- a/thirdparty/embree-aarch64/kernels/config.h
+++ /dev/null
@@ -1,76 +0,0 @@
-
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-/* #undef EMBREE_RAY_MASK */
-/* #undef EMBREE_STAT_COUNTERS */
-/* #undef EMBREE_BACKFACE_CULLING */
-/* #undef EMBREE_BACKFACE_CULLING_CURVES */
-#define EMBREE_FILTER_FUNCTION
-/* #undef EMBREE_IGNORE_INVALID_RAYS */
-#define EMBREE_GEOMETRY_TRIANGLE
-/* #undef EMBREE_GEOMETRY_QUAD */
-/* #undef EMBREE_GEOMETRY_CURVE */
-/* #undef EMBREE_GEOMETRY_SUBDIVISION */
-/* #undef EMBREE_GEOMETRY_USER */
-/* #undef EMBREE_GEOMETRY_INSTANCE */
-/* #undef EMBREE_GEOMETRY_GRID */
-/* #undef EMBREE_GEOMETRY_POINT */
-/* #undef EMBREE_RAY_PACKETS */
-/* #undef EMBREE_COMPACT_POLYS */
-
-#define EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR 2.0
-
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- #define IF_ENABLED_TRIS(x) x
-#else
- #define IF_ENABLED_TRIS(x)
-#endif
-
-#if defined(EMBREE_GEOMETRY_QUAD)
- #define IF_ENABLED_QUADS(x) x
-#else
- #define IF_ENABLED_QUADS(x)
-#endif
-
-#if defined(EMBREE_GEOMETRY_CURVE) || defined(EMBREE_GEOMETRY_POINT)
- #define IF_ENABLED_CURVES_OR_POINTS(x) x
-#else
- #define IF_ENABLED_CURVES_OR_POINTS(x)
-#endif
-
-#if defined(EMBREE_GEOMETRY_CURVE)
- #define IF_ENABLED_CURVES(x) x
-#else
- #define IF_ENABLED_CURVES(x)
-#endif
-
-#if defined(EMBREE_GEOMETRY_POINT)
- #define IF_ENABLED_POINTS(x) x
-#else
- #define IF_ENABLED_POINTS(x)
-#endif
-
-#if defined(EMBREE_GEOMETRY_SUBDIVISION)
- #define IF_ENABLED_SUBDIV(x) x
-#else
- #define IF_ENABLED_SUBDIV(x)
-#endif
-
-#if defined(EMBREE_GEOMETRY_USER)
- #define IF_ENABLED_USER(x) x
-#else
- #define IF_ENABLED_USER(x)
-#endif
-
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- #define IF_ENABLED_INSTANCE(x) x
-#else
- #define IF_ENABLED_INSTANCE(x)
-#endif
-
-#if defined(EMBREE_GEOMETRY_GRID)
- #define IF_ENABLED_GRIDS(x) x
-#else
- #define IF_ENABLED_GRIDS(x)
-#endif
diff --git a/thirdparty/embree-aarch64/kernels/geometry/cone.h b/thirdparty/embree-aarch64/kernels/geometry/cone.h
deleted file mode 100644
index 961ef86160..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/cone.h
+++ /dev/null
@@ -1,321 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-
-namespace embree
-{
- namespace isa
- {
- struct Cone
- {
- const Vec3fa p0; //!< start position of cone
- const Vec3fa p1; //!< end position of cone
- const float r0; //!< start radius of cone
- const float r1; //!< end radius of cone
-
- __forceinline Cone(const Vec3fa& p0, const float r0, const Vec3fa& p1, const float r1)
- : p0(p0), p1(p1), r0(r0), r1(r1) {}
-
- __forceinline bool intersect(const Vec3fa& org, const Vec3fa& dir,
- BBox1f& t_o,
- float& u0_o, Vec3fa& Ng0_o,
- float& u1_o, Vec3fa& Ng1_o) const
- {
- /* calculate quadratic equation to solve */
- const Vec3fa v0 = p0-org;
- const Vec3fa v1 = p1-org;
-
- const float rl = rcp_length(v1-v0);
- const Vec3fa P0 = v0, dP = (v1-v0)*rl;
- const float dr = (r1-r0)*rl;
- const Vec3fa O = -P0, dO = dir;
-
- const float dOdO = dot(dO,dO);
- const float OdO = dot(dO,O);
- const float OO = dot(O,O);
- const float dOz = dot(dP,dO);
- const float Oz = dot(dP,O);
-
- const float R = r0 + Oz*dr;
- const float A = dOdO - sqr(dOz) * (1.0f+sqr(dr));
- const float B = 2.0f * (OdO - dOz*(Oz + R*dr));
- const float C = OO - (sqr(Oz) + sqr(R));
-
- /* we miss the cone if determinant is smaller than zero */
- const float D = B*B - 4.0f*A*C;
- if (D < 0.0f) return false;
-
- /* special case for rays that are "parallel" to the cone */
- const float eps = float(1<<8)*float(ulp)*max(abs(dOdO),abs(sqr(dOz)));
- if (unlikely(abs(A) < eps))
- {
- /* cylinder case */
- if (abs(dr) < 16.0f*float(ulp)) {
- if (C <= 0.0f) { t_o = BBox1f(neg_inf,pos_inf); return true; }
- else { t_o = BBox1f(pos_inf,neg_inf); return false; }
- }
-
- /* cone case */
- else
- {
- /* if we hit the negative cone there cannot be a hit */
- const float t = -C/B;
- const float z0 = Oz+t*dOz;
- const float z0r = r0+z0*dr;
- if (z0r < 0.0f) return false;
-
- /* test if we start inside or outside the cone */
- if (dOz*dr > 0.0f) t_o = BBox1f(t,pos_inf);
- else t_o = BBox1f(neg_inf,t);
- }
- }
-
- /* standard case for "non-parallel" rays */
- else
- {
- const float Q = sqrt(D);
- const float rcp_2A = rcp(2.0f*A);
- t_o.lower = (-B-Q)*rcp_2A;
- t_o.upper = (-B+Q)*rcp_2A;
-
- /* standard case where both hits are on same cone */
- if (likely(A > 0.0f)) {
- const float z0 = Oz+t_o.lower*dOz;
- const float z0r = r0+z0*dr;
- if (z0r < 0.0f) return false;
- }
-
- /* special case where the hits are on the positive and negative cone */
- else
- {
- /* depending on the ray direction and the open direction
- * of the cone we have a hit from inside or outside the
- * cone */
- if (dOz*dr > 0) t_o.upper = pos_inf;
- else t_o.lower = neg_inf;
- }
- }
-
- /* calculates u and Ng for near hit */
- {
- u0_o = (Oz+t_o.lower*dOz)*rl;
- const Vec3fa Pr = t_o.lower*dir;
- const Vec3fa Pl = v0 + u0_o*(v1-v0);
- const Vec3fa R = normalize(Pr-Pl);
- const Vec3fa U = (p1-p0)+(r1-r0)*R;
- const Vec3fa V = cross(p1-p0,R);
- Ng0_o = cross(V,U);
- }
-
- /* calculates u and Ng for far hit */
- {
- u1_o = (Oz+t_o.upper*dOz)*rl;
- const Vec3fa Pr = t_o.upper*dir;
- const Vec3fa Pl = v0 + u1_o*(v1-v0);
- const Vec3fa R = normalize(Pr-Pl);
- const Vec3fa U = (p1-p0)+(r1-r0)*R;
- const Vec3fa V = cross(p1-p0,R);
- Ng1_o = cross(V,U);
- }
- return true;
- }
-
- __forceinline bool intersect(const Vec3fa& org, const Vec3fa& dir, BBox1f& t_o) const
- {
- float u0_o; Vec3fa Ng0_o; float u1_o; Vec3fa Ng1_o;
- return intersect(org,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o);
- }
-
- static bool verify(const size_t id, const Cone& cone, const Ray& ray, bool shouldhit, const float t0, const float t1)
- {
- float eps = 0.001f;
- BBox1f t; bool hit;
- hit = cone.intersect(ray.org,ray.dir,t);
-
- bool failed = hit != shouldhit;
- if (shouldhit) failed |= std::isinf(t0) ? t0 != t.lower : (t0 == -1E6) ? t.lower > -1E6f : abs(t0-t.lower) > eps;
- if (shouldhit) failed |= std::isinf(t1) ? t1 != t.upper : (t1 == +1E6) ? t.upper < +1E6f : abs(t1-t.upper) > eps;
- if (!failed) return true;
- embree_cout << "Cone test " << id << " failed: cone = " << cone << ", ray = " << ray << ", hit = " << hit << ", t = " << t << embree_endl;
- return false;
- }
-
- /* verify cone class */
- static bool verify()
- {
- bool passed = true;
- const Cone cone0(Vec3fa(0.0f,0.0f,0.0f),0.0f,Vec3fa(1.0f,0.0f,0.0f),1.0f);
- passed &= verify(0,cone0,Ray(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa(+1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,3.0f,pos_inf);
- passed &= verify(1,cone0,Ray(Vec3fa(+2.0f,1.0f,0.0f),Vec3fa(-1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,1.0f);
- passed &= verify(2,cone0,Ray(Vec3fa(-1.0f,0.0f,2.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),false,0.0f,0.0f);
- passed &= verify(3,cone0,Ray(Vec3fa(+1.0f,0.0f,2.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),true,1.0f,3.0f);
- passed &= verify(4,cone0,Ray(Vec3fa(-1.0f,0.0f,0.0f),Vec3fa(+1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,1.0f,pos_inf);
- passed &= verify(5,cone0,Ray(Vec3fa(+1.0f,0.0f,0.0f),Vec3fa(-1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,1.0f);
- passed &= verify(6,cone0,Ray(Vec3fa(+0.0f,0.0f,1.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),true,1.0f,1.0f);
- passed &= verify(7,cone0,Ray(Vec3fa(+0.0f,1.0f,0.0f),Vec3fa(-1.0f,-1.0f,+0.0f),0.0f,float(inf)),false,0.0f,0.0f);
- passed &= verify(8,cone0,Ray(Vec3fa(+0.0f,1.0f,0.0f),Vec3fa(+1.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.5f,+1E6);
- passed &= verify(9,cone0,Ray(Vec3fa(+0.0f,1.0f,0.0f),Vec3fa(-1.0f,+1.0f,+0.0f),0.0f,float(inf)),true,-1E6,-0.5f);
- const Cone cone1(Vec3fa(0.0f,0.0f,0.0f),1.0f,Vec3fa(1.0f,0.0f,0.0f),0.0f);
- passed &= verify(10,cone1,Ray(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa(+1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,2.0f);
- passed &= verify(11,cone1,Ray(Vec3fa(-1.0f,0.0f,2.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),true,0.0f,4.0f);
- const Cone cylinder(Vec3fa(0.0f,0.0f,0.0f),1.0f,Vec3fa(1.0f,0.0f,0.0f),1.0f);
- passed &= verify(12,cylinder,Ray(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f);
- passed &= verify(13,cylinder,Ray(Vec3fa(+2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f);
- passed &= verify(14,cylinder,Ray(Vec3fa(+2.0f,1.0f,2.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),false,0.0f,0.0f);
- passed &= verify(15,cylinder,Ray(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf);
- passed &= verify(16,cylinder,Ray(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf);
- passed &= verify(17,cylinder,Ray(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf);
- passed &= verify(18,cylinder,Ray(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf);
- return passed;
- }
-
- /*! output operator */
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const Cone& c) {
- return cout << "Cone { p0 = " << c.p0 << ", r0 = " << c.r0 << ", p1 = " << c.p1 << ", r1 = " << c.r1 << "}";
- }
- };
-
- template<int N>
- struct ConeN
- {
- typedef Vec3<vfloat<N>> Vec3vfN;
-
- const Vec3vfN p0; //!< start position of cone
- const Vec3vfN p1; //!< end position of cone
- const vfloat<N> r0; //!< start radius of cone
- const vfloat<N> r1; //!< end radius of cone
-
- __forceinline ConeN(const Vec3vfN& p0, const vfloat<N>& r0, const Vec3vfN& p1, const vfloat<N>& r1)
- : p0(p0), p1(p1), r0(r0), r1(r1) {}
-
- __forceinline Cone operator[] (const size_t i) const
- {
- assert(i<N);
- return Cone(Vec3fa(p0.x[i],p0.y[i],p0.z[i]),r0[i],Vec3fa(p1.x[i],p1.y[i],p1.z[i]),r1[i]);
- }
-
- __forceinline vbool<N> intersect(const Vec3fa& org, const Vec3fa& dir,
- BBox<vfloat<N>>& t_o,
- vfloat<N>& u0_o, Vec3vfN& Ng0_o,
- vfloat<N>& u1_o, Vec3vfN& Ng1_o) const
- {
- /* calculate quadratic equation to solve */
- const Vec3vfN v0 = p0-Vec3vfN(org);
- const Vec3vfN v1 = p1-Vec3vfN(org);
-
- const vfloat<N> rl = rcp_length(v1-v0);
- const Vec3vfN P0 = v0, dP = (v1-v0)*rl;
- const vfloat<N> dr = (r1-r0)*rl;
- const Vec3vfN O = -P0, dO = dir;
-
- const vfloat<N> dOdO = dot(dO,dO);
- const vfloat<N> OdO = dot(dO,O);
- const vfloat<N> OO = dot(O,O);
- const vfloat<N> dOz = dot(dP,dO);
- const vfloat<N> Oz = dot(dP,O);
-
- const vfloat<N> R = r0 + Oz*dr;
- const vfloat<N> A = dOdO - sqr(dOz) * (vfloat<N>(1.0f)+sqr(dr));
- const vfloat<N> B = 2.0f * (OdO - dOz*(Oz + R*dr));
- const vfloat<N> C = OO - (sqr(Oz) + sqr(R));
-
- /* we miss the cone if determinant is smaller than zero */
- const vfloat<N> D = B*B - 4.0f*A*C;
- vbool<N> valid = D >= 0.0f;
- if (none(valid)) return valid;
-
- /* special case for rays that are "parallel" to the cone */
- const vfloat<N> eps = float(1<<8)*float(ulp)*max(abs(dOdO),abs(sqr(dOz)));
- const vbool<N> validt = valid & (abs(A) < eps);
- const vbool<N> validf = valid & !(abs(A) < eps);
- if (unlikely(any(validt)))
- {
- const vboolx validtt = validt & (abs(dr) < 16.0f*float(ulp));
- const vboolx validtf = validt & (abs(dr) >= 16.0f*float(ulp));
-
- /* cylinder case */
- if (unlikely(any(validtt)))
- {
- t_o.lower = select(validtt, select(C <= 0.0f, vfloat<N>(neg_inf), vfloat<N>(pos_inf)), t_o.lower);
- t_o.upper = select(validtt, select(C <= 0.0f, vfloat<N>(pos_inf), vfloat<N>(neg_inf)), t_o.upper);
- valid &= !validtt | C <= 0.0f;
- }
-
- /* cone case */
- if (any(validtf))
- {
- /* if we hit the negative cone there cannot be a hit */
- const vfloat<N> t = -C/B;
- const vfloat<N> z0 = Oz+t*dOz;
- const vfloat<N> z0r = r0+z0*dr;
- valid &= !validtf | z0r >= 0.0f;
-
- /* test if we start inside or outside the cone */
- t_o.lower = select(validtf, select(dOz*dr > 0.0f, t, vfloat<N>(neg_inf)), t_o.lower);
- t_o.upper = select(validtf, select(dOz*dr > 0.0f, vfloat<N>(pos_inf), t), t_o.upper);
- }
- }
-
- /* standard case for "non-parallel" rays */
- if (likely(any(validf)))
- {
- const vfloat<N> Q = sqrt(D);
- const vfloat<N> rcp_2A = 0.5f*rcp(A);
- t_o.lower = select(validf, (-B-Q)*rcp_2A, t_o.lower);
- t_o.upper = select(validf, (-B+Q)*rcp_2A, t_o.upper);
-
- /* standard case where both hits are on same cone */
- const vbool<N> validft = validf & A>0.0f;
- const vbool<N> validff = validf & !(A>0.0f);
- if (any(validft)) {
- const vfloat<N> z0 = Oz+t_o.lower*dOz;
- const vfloat<N> z0r = r0+z0*dr;
- valid &= !validft | z0r >= 0.0f;
- }
-
- /* special case where the hits are on the positive and negative cone */
- if (any(validff)) {
- /* depending on the ray direction and the open direction
- * of the cone we have a hit from inside or outside the
- * cone */
- t_o.lower = select(validff, select(dOz*dr > 0.0f, t_o.lower, float(neg_inf)), t_o.lower);
- t_o.upper = select(validff, select(dOz*dr > 0.0f, float(pos_inf), t_o.upper), t_o.upper);
- }
- }
-
- /* calculates u and Ng for near hit */
- {
- u0_o = (Oz+t_o.lower*dOz)*rl;
- const Vec3vfN Pr = t_o.lower*Vec3vfN(dir);
- const Vec3vfN Pl = v0 + u0_o*(v1-v0);
- const Vec3vfN R = normalize(Pr-Pl);
- const Vec3vfN U = (p1-p0)+(r1-r0)*R;
- const Vec3vfN V = cross(p1-p0,R);
- Ng0_o = cross(V,U);
- }
-
- /* calculates u and Ng for far hit */
- {
- u1_o = (Oz+t_o.upper*dOz)*rl;
- const Vec3vfN Pr = t_o.lower*Vec3vfN(dir);
- const Vec3vfN Pl = v0 + u1_o*(v1-v0);
- const Vec3vfN R = normalize(Pr-Pl);
- const Vec3vfN U = (p1-p0)+(r1-r0)*R;
- const Vec3vfN V = cross(p1-p0,R);
- Ng1_o = cross(V,U);
- }
- return valid;
- }
-
- __forceinline vbool<N> intersect(const Vec3fa& org, const Vec3fa& dir, BBox<vfloat<N>>& t_o) const
- {
- vfloat<N> u0_o; Vec3vfN Ng0_o; vfloat<N> u1_o; Vec3vfN Ng1_o;
- return intersect(org,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o);
- }
- };
- }
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/geometry/coneline_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/coneline_intersector.h
deleted file mode 100644
index 0902baff7d..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/coneline_intersector.h
+++ /dev/null
@@ -1,209 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "curve_intersector_precalculations.h"
-
-namespace embree
-{
- namespace isa
- {
- namespace __coneline_internal
- {
- template<int M, typename Epilog, typename ray_tfar_func>
- static __forceinline bool intersectCone(const vbool<M>& valid_i,
- const Vec3vf<M>& ray_org_in, const Vec3vf<M>& ray_dir,
- const vfloat<M>& ray_tnear, const ray_tfar_func& ray_tfar,
- const Vec4vf<M>& v0, const Vec4vf<M>& v1,
- const vbool<M>& cL, const vbool<M>& cR,
- const Epilog& epilog)
- {
- vbool<M> valid = valid_i;
-
- /* move ray origin closer to make calculations numerically stable */
- const vfloat<M> dOdO = sqr(ray_dir);
- const vfloat<M> rcp_dOdO = rcp(dOdO);
- const Vec3vf<M> center = vfloat<M>(0.5f)*(v0.xyz()+v1.xyz());
- const vfloat<M> dt = dot(center-ray_org_in,ray_dir)*rcp_dOdO;
- const Vec3vf<M> ray_org = ray_org_in + dt*ray_dir;
-
- const Vec3vf<M> dP = v1.xyz() - v0.xyz();
- const Vec3vf<M> p0 = ray_org - v0.xyz();
- const Vec3vf<M> p1 = ray_org - v1.xyz();
-
- const vfloat<M> dPdP = sqr(dP);
- const vfloat<M> dP0 = dot(p0,dP);
- const vfloat<M> dP1 = dot(p1,dP);
- const vfloat<M> dOdP = dot(ray_dir,dP);
-
- // intersect cone body
- const vfloat<M> dr = v0.w - v1.w;
- const vfloat<M> hy = dPdP + sqr(dr);
- const vfloat<M> dO0 = dot(ray_dir,p0);
- const vfloat<M> OO = sqr(p0);
- const vfloat<M> dPdP2 = sqr(dPdP);
- const vfloat<M> dPdPr0 = dPdP*v0.w;
-
- const vfloat<M> A = dPdP2 - sqr(dOdP)*hy;
- const vfloat<M> B = dPdP2*dO0 - dP0*dOdP*hy + dPdPr0*(dr*dOdP);
- const vfloat<M> C = dPdP2*OO - sqr(dP0)*hy + dPdPr0*(2.0f*dr*dP0 - dPdPr0);
-
- const vfloat<M> D = B*B - A*C;
- valid &= D >= 0.0f;
- if (unlikely(none(valid))) {
- return false;
- }
-
- /* standard case for "non-parallel" rays */
- const vfloat<M> Q = sqrt(D);
- const vfloat<M> rcp_A = rcp(A);
- /* special case for rays that are "parallel" to the cone - assume miss */
- const vbool<M> isParallel = abs(A) <= min_rcp_input;
-
- vfloat<M> t_cone_lower = select (isParallel, neg_inf, (-B-Q)*rcp_A);
- vfloat<M> t_cone_upper = select (isParallel, pos_inf, (-B+Q)*rcp_A);
- const vfloat<M> y_lower = dP0 + t_cone_lower*dOdP;
- const vfloat<M> y_upper = dP0 + t_cone_upper*dOdP;
- t_cone_lower = select(valid & y_lower > 0.0f & y_lower < dPdP, t_cone_lower, pos_inf);
- t_cone_upper = select(valid & y_upper > 0.0f & y_upper < dPdP, t_cone_upper, neg_inf);
-
- const vbool<M> hitDisk0 = valid & cL;
- const vbool<M> hitDisk1 = valid & cR;
- const vfloat<M> rcp_dOdP = rcp(dOdP);
- const vfloat<M> t_disk0 = select (hitDisk0, select (sqr(p0*dOdP-ray_dir*dP0)<(sqr(v0.w)*sqr(dOdP)), -dP0*rcp_dOdP, pos_inf), pos_inf);
- const vfloat<M> t_disk1 = select (hitDisk1, select (sqr(p1*dOdP-ray_dir*dP1)<(sqr(v1.w)*sqr(dOdP)), -dP1*rcp_dOdP, pos_inf), pos_inf);
- const vfloat<M> t_disk_lower = min(t_disk0, t_disk1);
- const vfloat<M> t_disk_upper = max(t_disk0, t_disk1);
-
- const vfloat<M> t_lower = min(t_cone_lower, t_disk_lower);
- const vfloat<M> t_upper = max(t_cone_upper, select(t_lower==t_disk_lower,
- select(t_disk_upper==vfloat<M>(pos_inf),neg_inf,t_disk_upper),
- select(t_disk_lower==vfloat<M>(pos_inf),neg_inf,t_disk_lower)));
-
- const vbool<M> valid_lower = valid & ray_tnear <= dt+t_lower & dt+t_lower <= ray_tfar() & t_lower != vfloat<M>(pos_inf);
- const vbool<M> valid_upper = valid & ray_tnear <= dt+t_upper & dt+t_upper <= ray_tfar() & t_upper != vfloat<M>(neg_inf);
-
- const vbool<M> valid_first = valid_lower | valid_upper;
- if (unlikely(none(valid_first)))
- return false;
-
- const vfloat<M> t_first = select(valid_lower, t_lower, t_upper);
- const vfloat<M> y_first = select(valid_lower, y_lower, y_upper);
-
- const vfloat<M> rcp_dPdP = rcp(dPdP);
- const Vec3vf<M> dP2drr0dP = dPdP*dr*v0.w*dP;
- const Vec3vf<M> dPhy = dP*hy;
- const vbool<M> cone_hit_first = valid & (t_first == t_cone_lower | t_first == t_cone_upper);
- const vbool<M> disk0_hit_first = valid & (t_first == t_disk0);
- const Vec3vf<M> Ng_first = select(cone_hit_first, dPdP2*(p0+t_first*ray_dir)+dP2drr0dP-dPhy*y_first, select(disk0_hit_first, -dP, dP));
- const vfloat<M> u_first = select(cone_hit_first, y_first*rcp_dPdP, select(disk0_hit_first, vfloat<M>(zero), vfloat<M>(one)));
-
- /* invoke intersection filter for first hit */
- RoundLineIntersectorHitM<M> hit(u_first,zero,dt+t_first,Ng_first);
- const bool is_hit_first = epilog(valid_first, hit);
-
- /* check for possible second hits before potentially accepted hit */
- const vfloat<M> t_second = t_upper;
- const vfloat<M> y_second = y_upper;
- const vbool<M> valid_second = valid_lower & valid_upper & (dt+t_upper <= ray_tfar());
- if (unlikely(none(valid_second)))
- return is_hit_first;
-
- /* invoke intersection filter for second hit */
- const vbool<M> cone_hit_second = t_second == t_cone_lower | t_second == t_cone_upper;
- const vbool<M> disk0_hit_second = t_second == t_disk0;
- const Vec3vf<M> Ng_second = select(cone_hit_second, dPdP2*(p0+t_second*ray_dir)+dP2drr0dP-dPhy*y_second, select(disk0_hit_second, -dP, dP));
- const vfloat<M> u_second = select(cone_hit_second, y_second*rcp_dPdP, select(disk0_hit_first, vfloat<M>(zero), vfloat<M>(one)));
-
- hit = RoundLineIntersectorHitM<M>(u_second,zero,dt+t_second,Ng_second);
- const bool is_hit_second = epilog(valid_second, hit);
-
- return is_hit_first | is_hit_second;
- }
- }
-
- template<int M>
- struct ConeLineIntersectorHitM
- {
- __forceinline ConeLineIntersectorHitM() {}
-
- __forceinline ConeLineIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng)
- : vu(u), vv(v), vt(t), vNg(Ng) {}
-
- __forceinline void finalize() {}
-
- __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
- __forceinline float t (const size_t i) const { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- public:
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- template<int M>
- struct ConeCurveIntersector1
- {
- typedef CurvePrecalculations1 Precalculations;
-
- struct ray_tfar {
- Ray& ray;
- __forceinline ray_tfar(Ray& ray) : ray(ray) {}
- __forceinline vfloat<M> operator() () const { return ray.tfar; };
- };
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- Ray& ray,
- IntersectContext* context,
- const LineSegments* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
- const vbool<M>& cL, const vbool<M>& cR,
- const Epilog& epilog)
- {
- const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
- const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z);
- const vfloat<M> ray_tnear(ray.tnear());
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i);
- return __coneline_internal::intersectCone(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray),v0,v1,cL,cR,epilog);
- }
- };
-
- template<int M, int K>
- struct ConeCurveIntersectorK
- {
- typedef CurvePrecalculationsK<K> Precalculations;
-
- struct ray_tfar {
- RayK<K>& ray;
- size_t k;
- __forceinline ray_tfar(RayK<K>& ray, size_t k) : ray(ray), k(k) {}
- __forceinline vfloat<M> operator() () const { return ray.tfar[k]; };
- };
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- RayK<K>& ray, size_t k,
- IntersectContext* context,
- const LineSegments* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
- const vbool<M>& cL, const vbool<M>& cR,
- const Epilog& epilog)
- {
- const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]);
- const vfloat<M> ray_tnear = ray.tnear()[k];
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i);
- return __coneline_internal::intersectCone(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray,k),v0,v1,cL,cR,epilog);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/conelinei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/conelinei_intersector.h
deleted file mode 100644
index d47218eb8b..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/conelinei_intersector.h
+++ /dev/null
@@ -1,141 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "coneline_intersector.h"
-#include "intersector_epilog.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M, int Mx, bool filter>
- struct ConeCurveMiIntersector1
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1;
- vbool<M> cL,cR;
- line.gather(v0,v1,cL,cR,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1;
- vbool<M> cL,cR;
- line.gather(v0,v1,cL,cR,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- return ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- return false;
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
- }
- };
-
- template<int M, int Mx, bool filter>
- struct ConeCurveMiMBIntersector1
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1;
- vbool<M> cL,cR;
- line.gather(v0,v1,cL,cR,geom,ray.time());
- const vbool<Mx> valid = line.template valid<Mx>();
- ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1;
- vbool<M> cL,cR;
- line.gather(v0,v1,cL,cR,geom,ray.time());
- const vbool<Mx> valid = line.template valid<Mx>();
- return ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- return false;
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct ConeCurveMiIntersectorK
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1;
- vbool<M> cL,cR;
- line.gather(v0,v1,cL,cR,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1;
- vbool<M> cL,cR;
- line.gather(v0,v1,cL,cR,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- return ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct ConeCurveMiMBIntersectorK
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1;
- vbool<M> cL,cR;
- line.gather(v0,v1,cL,cR,geom,ray.time()[k]);
- const vbool<Mx> valid = line.template valid<Mx>();
- ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1;
- vbool<M> cL,cR;
- line.gather(v0,v1,cL,cR,geom,ray.time()[k]);
- const vbool<Mx> valid = line.template valid<Mx>();
- return ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi.h b/thirdparty/embree-aarch64/kernels/geometry/curveNi.h
deleted file mode 100644
index 51384f1959..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curveNi.h
+++ /dev/null
@@ -1,222 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-#include "curve_intersector_precalculations.h"
-
-namespace embree
-{
- template<int M>
- struct CurveNi
- {
- struct Type : public PrimitiveType {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* Returns maximum number of stored primitives */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+M-1)/M; }
-
- static __forceinline size_t bytes(size_t N)
- {
- const size_t f = N/M, r = N%M;
- static_assert(sizeof(CurveNi) == 22+25*M, "internal data layout issue");
- return f*sizeof(CurveNi) + (r!=0)*(22 + 25*r);
- }
-
- public:
-
- /*! Default constructor. */
- __forceinline CurveNi () {}
-
- /*! fill curve from curve list */
- __forceinline void fill(const PrimRef* prims, size_t& begin, size_t _end, Scene* scene)
- {
- size_t end = min(begin+M,_end);
- N = (uint8_t)(end-begin);
- const unsigned int geomID0 = prims[begin].geomID();
- this->geomID(N) = geomID0;
- ty = (uint8_t) scene->get(geomID0)->getType();
-
- /* encode all primitives */
- BBox3fa bounds = empty;
- for (size_t i=0; i<N; i++)
- {
- const PrimRef& prim = prims[begin+i];
- const unsigned int geomID = prim.geomID(); assert(geomID == geomID0);
- const unsigned int primID = prim.primID();
- bounds.extend(scene->get(geomID)->vbounds(primID));
- }
-
- /* calculate offset and scale */
- Vec3fa loffset = bounds.lower;
- float lscale = reduce_min(256.0f/(bounds.size()*sqrt(3.0f)));
- if (bounds.size() == Vec3fa(zero)) lscale = 0.0f;
- *this->offset(N) = loffset;
- *this->scale(N) = lscale;
-
- /* encode all primitives */
- for (size_t i=0; i<M && begin<end; i++, begin++)
- {
- const PrimRef& prim = prims[begin];
- const unsigned int geomID = prim.geomID();
- const unsigned int primID = prim.primID();
- const LinearSpace3fa space2 = scene->get(geomID)->computeAlignedSpace(primID);
-
- const LinearSpace3fa space3(trunc(126.0f*space2.vx),trunc(126.0f*space2.vy),trunc(126.0f*space2.vz));
- const BBox3fa bounds = scene->get(geomID)->vbounds(loffset,lscale,max(length(space3.vx),length(space3.vy),length(space3.vz)),space3.transposed(),primID);
-
- bounds_vx_x(N)[i] = (int8_t) space3.vx.x;
- bounds_vx_y(N)[i] = (int8_t) space3.vx.y;
- bounds_vx_z(N)[i] = (int8_t) space3.vx.z;
- bounds_vx_lower(N)[i] = (short) clamp(floor(bounds.lower.x),-32767.0f,32767.0f);
- bounds_vx_upper(N)[i] = (short) clamp(ceil (bounds.upper.x),-32767.0f,32767.0f);
- assert(-32767.0f <= floor(bounds.lower.x) && floor(bounds.lower.x) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.upper.x) && ceil (bounds.upper.x) <= 32767.0f);
-
- bounds_vy_x(N)[i] = (int8_t) space3.vy.x;
- bounds_vy_y(N)[i] = (int8_t) space3.vy.y;
- bounds_vy_z(N)[i] = (int8_t) space3.vy.z;
- bounds_vy_lower(N)[i] = (short) clamp(floor(bounds.lower.y),-32767.0f,32767.0f);
- bounds_vy_upper(N)[i] = (short) clamp(ceil (bounds.upper.y),-32767.0f,32767.0f);
- assert(-32767.0f <= floor(bounds.lower.y) && floor(bounds.lower.y) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.upper.y) && ceil (bounds.upper.y) <= 32767.0f);
-
- bounds_vz_x(N)[i] = (int8_t) space3.vz.x;
- bounds_vz_y(N)[i] = (int8_t) space3.vz.y;
- bounds_vz_z(N)[i] = (int8_t) space3.vz.z;
- bounds_vz_lower(N)[i] = (short) clamp(floor(bounds.lower.z),-32767.0f,32767.0f);
- bounds_vz_upper(N)[i] = (short) clamp(ceil (bounds.upper.z),-32767.0f,32767.0f);
- assert(-32767.0f <= floor(bounds.lower.z) && floor(bounds.lower.z) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.upper.z) && ceil (bounds.upper.z) <= 32767.0f);
-
- this->primID(N)[i] = primID;
- }
- }
-
- template<typename BVH, typename Allocator>
- __forceinline static typename BVH::NodeRef createLeaf (BVH* bvh, const PrimRef* prims, const range<size_t>& set, const Allocator& alloc)
- {
- size_t start = set.begin();
- size_t items = CurveNi::blocks(set.size());
- size_t numbytes = CurveNi::bytes(set.size());
- CurveNi* accel = (CurveNi*) alloc.malloc1(numbytes,BVH::byteAlignment);
- for (size_t i=0; i<items; i++) {
- accel[i].fill(prims,start,set.end(),bvh->scene);
- }
- return bvh->encodeLeaf((int8_t*)accel,items);
- };
-
- public:
-
- // 27.6 - 46 bytes per primitive
- uint8_t ty;
- uint8_t N;
- uint8_t data[4+25*M+16];
-
- /*
- struct Layout
- {
- unsigned int geomID;
- unsigned int primID[N];
-
- int8_t bounds_vx_x[N];
- int8_t bounds_vx_y[N];
- int8_t bounds_vx_z[N];
- short bounds_vx_lower[N];
- short bounds_vx_upper[N];
-
- int8_t bounds_vy_x[N];
- int8_t bounds_vy_y[N];
- int8_t bounds_vy_z[N];
- short bounds_vy_lower[N];
- short bounds_vy_upper[N];
-
- int8_t bounds_vz_x[N];
- int8_t bounds_vz_y[N];
- int8_t bounds_vz_z[N];
- short bounds_vz_lower[N];
- short bounds_vz_upper[N];
-
- Vec3f offset;
- float scale;
- };
- */
-
- __forceinline unsigned int& geomID(size_t N) { return *(unsigned int*)((int8_t*)this+2); }
- __forceinline const unsigned int& geomID(size_t N) const { return *(unsigned int*)((int8_t*)this+2); }
-
- __forceinline unsigned int* primID(size_t N) { return (unsigned int*)((int8_t*)this+6); }
- __forceinline const unsigned int* primID(size_t N) const { return (unsigned int*)((int8_t*)this+6); }
-
- __forceinline int8_t* bounds_vx_x(size_t N) { return (int8_t*)((int8_t*)this+6+4*N); }
- __forceinline const int8_t* bounds_vx_x(size_t N) const { return (int8_t*)((int8_t*)this+6+4*N); }
-
- __forceinline int8_t* bounds_vx_y(size_t N) { return (int8_t*)((int8_t*)this+6+5*N); }
- __forceinline const int8_t* bounds_vx_y(size_t N) const { return (int8_t*)((int8_t*)this+6+5*N); }
-
- __forceinline int8_t* bounds_vx_z(size_t N) { return (int8_t*)((int8_t*)this+6+6*N); }
- __forceinline const int8_t* bounds_vx_z(size_t N) const { return (int8_t*)((int8_t*)this+6+6*N); }
-
- __forceinline short* bounds_vx_lower(size_t N) { return (short*)((int8_t*)this+6+7*N); }
- __forceinline const short* bounds_vx_lower(size_t N) const { return (short*)((int8_t*)this+6+7*N); }
-
- __forceinline short* bounds_vx_upper(size_t N) { return (short*)((int8_t*)this+6+9*N); }
- __forceinline const short* bounds_vx_upper(size_t N) const { return (short*)((int8_t*)this+6+9*N); }
-
- __forceinline int8_t* bounds_vy_x(size_t N) { return (int8_t*)((int8_t*)this+6+11*N); }
- __forceinline const int8_t* bounds_vy_x(size_t N) const { return (int8_t*)((int8_t*)this+6+11*N); }
-
- __forceinline int8_t* bounds_vy_y(size_t N) { return (int8_t*)((int8_t*)this+6+12*N); }
- __forceinline const int8_t* bounds_vy_y(size_t N) const { return (int8_t*)((int8_t*)this+6+12*N); }
-
- __forceinline int8_t* bounds_vy_z(size_t N) { return (int8_t*)((int8_t*)this+6+13*N); }
- __forceinline const int8_t* bounds_vy_z(size_t N) const { return (int8_t*)((int8_t*)this+6+13*N); }
-
- __forceinline short* bounds_vy_lower(size_t N) { return (short*)((int8_t*)this+6+14*N); }
- __forceinline const short* bounds_vy_lower(size_t N) const { return (short*)((int8_t*)this+6+14*N); }
-
- __forceinline short* bounds_vy_upper(size_t N) { return (short*)((int8_t*)this+6+16*N); }
- __forceinline const short* bounds_vy_upper(size_t N) const { return (short*)((int8_t*)this+6+16*N); }
-
- __forceinline int8_t* bounds_vz_x(size_t N) { return (int8_t*)((int8_t*)this+6+18*N); }
- __forceinline const int8_t* bounds_vz_x(size_t N) const { return (int8_t*)((int8_t*)this+6+18*N); }
-
- __forceinline int8_t* bounds_vz_y(size_t N) { return (int8_t*)((int8_t*)this+6+19*N); }
- __forceinline const int8_t* bounds_vz_y(size_t N) const { return (int8_t*)((int8_t*)this+6+19*N); }
-
- __forceinline int8_t* bounds_vz_z(size_t N) { return (int8_t*)((int8_t*)this+6+20*N); }
- __forceinline const int8_t* bounds_vz_z(size_t N) const { return (int8_t*)((int8_t*)this+6+20*N); }
-
- __forceinline short* bounds_vz_lower(size_t N) { return (short*)((int8_t*)this+6+21*N); }
- __forceinline const short* bounds_vz_lower(size_t N) const { return (short*)((int8_t*)this+6+21*N); }
-
- __forceinline short* bounds_vz_upper(size_t N) { return (short*)((int8_t*)this+6+23*N); }
- __forceinline const short* bounds_vz_upper(size_t N) const { return (short*)((int8_t*)this+6+23*N); }
-
- __forceinline Vec3f* offset(size_t N) { return (Vec3f*)((int8_t*)this+6+25*N); }
- __forceinline const Vec3f* offset(size_t N) const { return (Vec3f*)((int8_t*)this+6+25*N); }
-
- __forceinline float* scale(size_t N) { return (float*)((int8_t*)this+6+25*N+12); }
- __forceinline const float* scale(size_t N) const { return (float*)((int8_t*)this+6+25*N+12); }
-
- __forceinline int8_t* end(size_t N) { return (int8_t*)this+6+25*N+16; }
- __forceinline const int8_t* end(size_t N) const { return (int8_t*)this+6+25*N+16; }
- };
-
- template<int M>
- typename CurveNi<M>::Type CurveNi<M>::type;
-
- typedef CurveNi<4> Curve4i;
- typedef CurveNi<8> Curve8i;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/curveNi_intersector.h
deleted file mode 100644
index 0f9038c9fc..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curveNi_intersector.h
+++ /dev/null
@@ -1,569 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curveNi.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct CurveNiIntersector1
- {
- typedef CurveNi<M> Primitive;
- typedef Vec3vf<M> Vec3vfM;
- typedef LinearSpace3<Vec3vfM>LinearSpace3vfM;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline vbool<M> intersect(Ray& ray, const Primitive& prim, vfloat<M>& tNear_o)
- {
- const size_t N = prim.N;
- const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N));
- const Vec3fa offset = Vec3fa(offset_scale);
- const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale));
- const Vec3fa org1 = (ray.org-offset)*scale;
- const Vec3fa dir1 = ray.dir*scale;
-
- const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)),
- vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)),
- vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N)));
-
- const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1));
- const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1));
- const Vec3vfM rcp_dir2 = rcp_safe(dir2);
-
- const vfloat<M> t_lower_x = (vfloat<M>::load(prim.bounds_vx_lower(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
- const vfloat<M> t_upper_x = (vfloat<M>::load(prim.bounds_vx_upper(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
- const vfloat<M> t_lower_y = (vfloat<M>::load(prim.bounds_vy_lower(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
- const vfloat<M> t_upper_y = (vfloat<M>::load(prim.bounds_vy_upper(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
- const vfloat<M> t_lower_z = (vfloat<M>::load(prim.bounds_vz_lower(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
- const vfloat<M> t_upper_z = (vfloat<M>::load(prim.bounds_vz_upper(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
-
- const vfloat<M> round_up (1.0f+3.0f*float(ulp));
- const vfloat<M> round_down(1.0f-3.0f*float(ulp));
- const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear()));
- const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar));
- tNear_o = tNear;
- return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar);
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_t(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID));
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- const unsigned int primID1 = prim.primID(N)[i1];
- geom->prefetchL1_vertices(geom->curve(primID1));
- if (mask1) {
- const size_t i2 = bsf(mask1);
- const unsigned int primID2 = prim.primID(N)[i2];
- geom->prefetchL2_vertices(geom->curve(primID2));
- }
- }
-
- Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_t(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID));
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- const unsigned int primID1 = prim.primID(N)[i1];
- geom->prefetchL1_vertices(geom->curve(primID1));
- if (mask1) {
- const size_t i2 = bsf(mask1);
- const unsigned int primID2 = prim.primID(N)[i2];
- geom->prefetchL2_vertices(geom->curve(primID2));
- }
- }
-
- if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_n(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
-
- unsigned int vertexID = geom->curve(primID);
- Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID);
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- const unsigned int primID1 = prim.primID(N)[i1];
- geom->prefetchL1_vertices(geom->curve(primID1));
- if (mask1) {
- const size_t i2 = bsf(mask1);
- const unsigned int primID2 = prim.primID(N)[i2];
- geom->prefetchL2_vertices(geom->curve(primID2));
- }
- }
-
- Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_n(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
-
- unsigned int vertexID = geom->curve(primID);
- Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID);
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- const unsigned int primID1 = prim.primID(N)[i1];
- geom->prefetchL1_vertices(geom->curve(primID1));
- if (mask1) {
- const size_t i2 = bsf(mask1);
- const unsigned int primID2 = prim.primID(N)[i2];
- geom->prefetchL2_vertices(geom->curve(primID2));
- }
- }
-
- if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_h(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID));
- Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_h(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID));
- if (Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_hn(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID));
- Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_hn(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID));
- if (Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
- };
-
- template<int M, int K>
- struct CurveNiIntersectorK
- {
- typedef CurveNi<M> Primitive;
- typedef Vec3vf<M> Vec3vfM;
- typedef LinearSpace3<Vec3vfM>LinearSpace3vfM;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline vbool<M> intersect(RayK<K>& ray, const size_t k, const Primitive& prim, vfloat<M>& tNear_o)
- {
- const size_t N = prim.N;
- const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N));
- const Vec3fa offset = Vec3fa(offset_scale);
- const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale));
-
- const Vec3fa ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]);
- const Vec3fa ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]);
- const Vec3fa org1 = (ray_org-offset)*scale;
- const Vec3fa dir1 = ray_dir*scale;
-
- const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)),
- vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)),
- vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N)));
-
- const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1));
- const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1));
- const Vec3vfM rcp_dir2 = rcp_safe(dir2);
-
- const vfloat<M> t_lower_x = (vfloat<M>::load(prim.bounds_vx_lower(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
- const vfloat<M> t_upper_x = (vfloat<M>::load(prim.bounds_vx_upper(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
- const vfloat<M> t_lower_y = (vfloat<M>::load(prim.bounds_vy_lower(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
- const vfloat<M> t_upper_y = (vfloat<M>::load(prim.bounds_vy_upper(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
- const vfloat<M> t_lower_z = (vfloat<M>::load(prim.bounds_vz_lower(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
- const vfloat<M> t_upper_z = (vfloat<M>::load(prim.bounds_vz_upper(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
-
- const vfloat<M> round_up (1.0f+3.0f*float(ulp));
- const vfloat<M> round_down(1.0f-3.0f*float(ulp));
- const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear()[k]));
- const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar[k]));
- tNear_o = tNear;
- return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar);
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_t(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID));
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- const unsigned int primID1 = prim.primID(N)[i1];
- geom->prefetchL1_vertices(geom->curve(primID1));
- if (mask1) {
- const size_t i2 = bsf(mask1);
- const unsigned int primID2 = prim.primID(N)[i2];
- geom->prefetchL2_vertices(geom->curve(primID2));
- }
- }
-
- Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_t(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID));
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- const unsigned int primID1 = prim.primID(N)[i1];
- geom->prefetchL1_vertices(geom->curve(primID1));
- if (mask1) {
- const size_t i2 = bsf(mask1);
- const unsigned int primID2 = prim.primID(N)[i2];
- geom->prefetchL2_vertices(geom->curve(primID2));
- }
- }
-
- if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_n(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
-
- unsigned int vertexID = geom->curve(primID);
- Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID);
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- const unsigned int primID1 = prim.primID(N)[i1];
- geom->prefetchL1_vertices(geom->curve(primID1));
- if (mask1) {
- const size_t i2 = bsf(mask1);
- const unsigned int primID2 = prim.primID(N)[i2];
- geom->prefetchL2_vertices(geom->curve(primID2));
- }
- }
-
- Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_n(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
-
- unsigned int vertexID = geom->curve(primID);
- Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID);
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- const unsigned int primID1 = prim.primID(N)[i1];
- geom->prefetchL1_vertices(geom->curve(primID1));
- if (mask1) {
- const size_t i2 = bsf(mask1);
- const unsigned int primID2 = prim.primID(N)[i2];
- geom->prefetchL2_vertices(geom->curve(primID2));
- }
- }
-
- if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_h(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID));
- Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_h(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID));
- if (Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_hn(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID));
- Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_hn(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID));
- if (Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb.h b/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb.h
deleted file mode 100644
index 0cd8f833fd..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb.h
+++ /dev/null
@@ -1,278 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-#include "curve_intersector_precalculations.h"
-
-namespace embree
-{
- template<int M>
- struct CurveNiMB
- {
- struct Type : public PrimitiveType {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* Returns maximum number of stored primitives */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+M-1)/M; }
-
- static __forceinline size_t bytes(size_t N)
- {
- const size_t f = N/M, r = N%M;
- static_assert(sizeof(CurveNiMB) == 6+37*M+24, "internal data layout issue");
- return f*sizeof(CurveNiMB) + (r!=0)*(6+37*r+24);
- }
-
- public:
-
- /*! Default constructor. */
- __forceinline CurveNiMB () {}
-
- /*! fill curve from curve list */
- __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t _end, Scene* scene, const BBox1f time_range)
- {
- size_t end = min(begin+M,_end);
- N = (uint8_t)(end-begin);
- const unsigned int geomID0 = prims[begin].geomID();
- this->geomID(N) = geomID0;
- ty = (uint8_t) scene->get(geomID0)->getType();
-
- /* encode all primitives */
- LBBox3fa lbounds = empty;
- for (size_t i=0; i<N; i++)
- {
- const PrimRefMB& prim = prims[begin+i];
- const unsigned int geomID = prim.geomID(); assert(geomID == geomID0);
- const unsigned int primID = prim.primID();
- lbounds.extend(scene->get(geomID)->vlinearBounds(primID,time_range));
- }
- BBox3fa bounds = lbounds.bounds();
-
- /* calculate offset and scale */
- Vec3fa loffset = bounds.lower;
- float lscale = reduce_min(256.0f/(bounds.size()*sqrt(3.0f)));
- if (bounds.size() == Vec3fa(zero)) lscale = 0.0f;
- *this->offset(N) = loffset;
- *this->scale(N) = lscale;
- this->time_offset(N) = time_range.lower;
- this->time_scale(N) = 1.0f/time_range.size();
-
- /* encode all primitives */
- for (size_t i=0; i<M && begin<end; i++, begin++)
- {
- const PrimRefMB& prim = prims[begin];
- const unsigned int geomID = prim.geomID();
- const unsigned int primID = prim.primID();
- const LinearSpace3fa space2 = scene->get(geomID)->computeAlignedSpaceMB(primID,time_range);
-
- const LinearSpace3fa space3(trunc(126.0f*space2.vx),trunc(126.0f*space2.vy),trunc(126.0f*space2.vz));
- const LBBox3fa bounds = scene->get(geomID)->vlinearBounds(loffset,lscale,max(length(space3.vx),length(space3.vy),length(space3.vz)),space3.transposed(),primID,time_range);
-
- // NOTE: this weird (int8_t) (short) cast works around VS2015 Win32 compiler bug
- bounds_vx_x(N)[i] = (int8_t) (short) space3.vx.x;
- bounds_vx_y(N)[i] = (int8_t) (short) space3.vx.y;
- bounds_vx_z(N)[i] = (int8_t) (short) space3.vx.z;
- bounds_vx_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.x),-32767.0f,32767.0f);
- bounds_vx_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.x),-32767.0f,32767.0f);
- bounds_vx_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.x),-32767.0f,32767.0f);
- bounds_vx_upper1(N)[i] = (short) clamp(ceil (bounds.bounds1.upper.x),-32767.0f,32767.0f);
- assert(-32767.0f <= floor(bounds.bounds0.lower.x) && floor(bounds.bounds0.lower.x) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.bounds0.upper.x) && ceil (bounds.bounds0.upper.x) <= 32767.0f);
- assert(-32767.0f <= floor(bounds.bounds1.lower.x) && floor(bounds.bounds1.lower.x) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.bounds1.upper.x) && ceil (bounds.bounds1.upper.x) <= 32767.0f);
-
- bounds_vy_x(N)[i] = (int8_t) (short) space3.vy.x;
- bounds_vy_y(N)[i] = (int8_t) (short) space3.vy.y;
- bounds_vy_z(N)[i] = (int8_t) (short) space3.vy.z;
- bounds_vy_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.y),-32767.0f,32767.0f);
- bounds_vy_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.y),-32767.0f,32767.0f);
- bounds_vy_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.y),-32767.0f,32767.0f);
- bounds_vy_upper1(N)[i] = (short) clamp(ceil (bounds.bounds1.upper.y),-32767.0f,32767.0f);
- assert(-32767.0f <= floor(bounds.bounds0.lower.y) && floor(bounds.bounds0.lower.y) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.bounds0.upper.y) && ceil (bounds.bounds0.upper.y) <= 32767.0f);
- assert(-32767.0f <= floor(bounds.bounds1.lower.y) && floor(bounds.bounds1.lower.y) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.bounds1.upper.y) && ceil (bounds.bounds1.upper.y) <= 32767.0f);
-
- bounds_vz_x(N)[i] = (int8_t) (short) space3.vz.x;
- bounds_vz_y(N)[i] = (int8_t) (short) space3.vz.y;
- bounds_vz_z(N)[i] = (int8_t) (short) space3.vz.z;
- bounds_vz_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.z),-32767.0f,32767.0f);
- bounds_vz_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.z),-32767.0f,32767.0f);
- bounds_vz_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.z),-32767.0f,32767.0f);
- bounds_vz_upper1(N)[i] = (short) clamp(ceil (bounds.bounds1.upper.z),-32767.0f,32767.0f);
- assert(-32767.0f <= floor(bounds.bounds0.lower.z) && floor(bounds.bounds0.lower.z) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.bounds0.upper.z) && ceil (bounds.bounds0.upper.z) <= 32767.0f);
- assert(-32767.0f <= floor(bounds.bounds1.lower.z) && floor(bounds.bounds1.lower.z) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.bounds1.upper.z) && ceil (bounds.bounds1.upper.z) <= 32767.0f);
-
- this->primID(N)[i] = primID;
- }
-
- return lbounds;
- }
-
- template<typename BVH, typename SetMB, typename Allocator>
- __forceinline static typename BVH::NodeRecordMB4D createLeafMB(BVH* bvh, const SetMB& prims, const Allocator& alloc)
- {
- size_t start = prims.begin();
- size_t end = prims.end();
- size_t items = CurveNiMB::blocks(prims.size());
- size_t numbytes = CurveNiMB::bytes(prims.size());
- CurveNiMB* accel = (CurveNiMB*) alloc.malloc1(numbytes,BVH::byteAlignment);
- const typename BVH::NodeRef node = bvh->encodeLeaf((int8_t*)accel,items);
-
- LBBox3fa bounds = empty;
- for (size_t i=0; i<items; i++)
- bounds.extend(accel[i].fillMB(prims.prims->data(),start,end,bvh->scene,prims.time_range));
-
- return typename BVH::NodeRecordMB4D(node,bounds,prims.time_range);
- };
-
-
- public:
-
- // 27.6 - 46 bytes per primitive
- uint8_t ty;
- uint8_t N;
- uint8_t data[4+37*M+24];
-
- /*
- struct Layout
- {
- unsigned int geomID;
- unsigned int primID[N];
-
- int8_t bounds_vx_x[N];
- int8_t bounds_vx_y[N];
- int8_t bounds_vx_z[N];
- short bounds_vx_lower0[N];
- short bounds_vx_upper0[N];
- short bounds_vx_lower1[N];
- short bounds_vx_upper1[N];
-
- int8_t bounds_vy_x[N];
- int8_t bounds_vy_y[N];
- int8_t bounds_vy_z[N];
- short bounds_vy_lower0[N];
- short bounds_vy_upper0[N];
- short bounds_vy_lower1[N];
- short bounds_vy_upper1[N];
-
- int8_t bounds_vz_x[N];
- int8_t bounds_vz_y[N];
- int8_t bounds_vz_z[N];
- short bounds_vz_lower0[N];
- short bounds_vz_upper0[N];
- short bounds_vz_lower1[N];
- short bounds_vz_upper1[N];
-
- Vec3f offset;
- float scale;
-
- float time_offset;
- float time_scale;
- };
- */
-
- __forceinline unsigned int& geomID(size_t N) { return *(unsigned int*)((int8_t*)this+2); }
- __forceinline const unsigned int& geomID(size_t N) const { return *(unsigned int*)((int8_t*)this+2); }
-
- __forceinline unsigned int* primID(size_t N) { return (unsigned int*)((int8_t*)this+6); }
- __forceinline const unsigned int* primID(size_t N) const { return (unsigned int*)((int8_t*)this+6); }
-
- __forceinline int8_t* bounds_vx_x(size_t N) { return (int8_t*)((int8_t*)this+6+4*N); }
- __forceinline const int8_t* bounds_vx_x(size_t N) const { return (int8_t*)((int8_t*)this+6+4*N); }
-
- __forceinline int8_t* bounds_vx_y(size_t N) { return (int8_t*)((int8_t*)this+6+5*N); }
- __forceinline const int8_t* bounds_vx_y(size_t N) const { return (int8_t*)((int8_t*)this+6+5*N); }
-
- __forceinline int8_t* bounds_vx_z(size_t N) { return (int8_t*)((int8_t*)this+6+6*N); }
- __forceinline const int8_t* bounds_vx_z(size_t N) const { return (int8_t*)((int8_t*)this+6+6*N); }
-
- __forceinline short* bounds_vx_lower0(size_t N) { return (short*)((int8_t*)this+6+7*N); }
- __forceinline const short* bounds_vx_lower0(size_t N) const { return (short*)((int8_t*)this+6+7*N); }
-
- __forceinline short* bounds_vx_upper0(size_t N) { return (short*)((int8_t*)this+6+9*N); }
- __forceinline const short* bounds_vx_upper0(size_t N) const { return (short*)((int8_t*)this+6+9*N); }
-
- __forceinline short* bounds_vx_lower1(size_t N) { return (short*)((int8_t*)this+6+11*N); }
- __forceinline const short* bounds_vx_lower1(size_t N) const { return (short*)((int8_t*)this+6+11*N); }
-
- __forceinline short* bounds_vx_upper1(size_t N) { return (short*)((int8_t*)this+6+13*N); }
- __forceinline const short* bounds_vx_upper1(size_t N) const { return (short*)((int8_t*)this+6+13*N); }
-
- __forceinline int8_t* bounds_vy_x(size_t N) { return (int8_t*)((int8_t*)this+6+15*N); }
- __forceinline const int8_t* bounds_vy_x(size_t N) const { return (int8_t*)((int8_t*)this+6+15*N); }
-
- __forceinline int8_t* bounds_vy_y(size_t N) { return (int8_t*)((int8_t*)this+6+16*N); }
- __forceinline const int8_t* bounds_vy_y(size_t N) const { return (int8_t*)((int8_t*)this+6+16*N); }
-
- __forceinline int8_t* bounds_vy_z(size_t N) { return (int8_t*)((int8_t*)this+6+17*N); }
- __forceinline const int8_t* bounds_vy_z(size_t N) const { return (int8_t*)((int8_t*)this+6+17*N); }
-
- __forceinline short* bounds_vy_lower0(size_t N) { return (short*)((int8_t*)this+6+18*N); }
- __forceinline const short* bounds_vy_lower0(size_t N) const { return (short*)((int8_t*)this+6+18*N); }
-
- __forceinline short* bounds_vy_upper0(size_t N) { return (short*)((int8_t*)this+6+20*N); }
- __forceinline const short* bounds_vy_upper0(size_t N) const { return (short*)((int8_t*)this+6+20*N); }
-
- __forceinline short* bounds_vy_lower1(size_t N) { return (short*)((int8_t*)this+6+22*N); }
- __forceinline const short* bounds_vy_lower1(size_t N) const { return (short*)((int8_t*)this+6+22*N); }
-
- __forceinline short* bounds_vy_upper1(size_t N) { return (short*)((int8_t*)this+6+24*N); }
- __forceinline const short* bounds_vy_upper1(size_t N) const { return (short*)((int8_t*)this+6+24*N); }
-
- __forceinline int8_t* bounds_vz_x(size_t N) { return (int8_t*)((int8_t*)this+6+26*N); }
- __forceinline const int8_t* bounds_vz_x(size_t N) const { return (int8_t*)((int8_t*)this+6+26*N); }
-
- __forceinline int8_t* bounds_vz_y(size_t N) { return (int8_t*)((int8_t*)this+6+27*N); }
- __forceinline const int8_t* bounds_vz_y(size_t N) const { return (int8_t*)((int8_t*)this+6+27*N); }
-
- __forceinline int8_t* bounds_vz_z(size_t N) { return (int8_t*)((int8_t*)this+6+28*N); }
- __forceinline const int8_t* bounds_vz_z(size_t N) const { return (int8_t*)((int8_t*)this+6+28*N); }
-
- __forceinline short* bounds_vz_lower0(size_t N) { return (short*)((int8_t*)this+6+29*N); }
- __forceinline const short* bounds_vz_lower0(size_t N) const { return (short*)((int8_t*)this+6+29*N); }
-
- __forceinline short* bounds_vz_upper0(size_t N) { return (short*)((int8_t*)this+6+31*N); }
- __forceinline const short* bounds_vz_upper0(size_t N) const { return (short*)((int8_t*)this+6+31*N); }
-
- __forceinline short* bounds_vz_lower1(size_t N) { return (short*)((int8_t*)this+6+33*N); }
- __forceinline const short* bounds_vz_lower1(size_t N) const { return (short*)((int8_t*)this+6+33*N); }
-
- __forceinline short* bounds_vz_upper1(size_t N) { return (short*)((int8_t*)this+6+35*N); }
- __forceinline const short* bounds_vz_upper1(size_t N) const { return (short*)((int8_t*)this+6+35*N); }
-
- __forceinline Vec3f* offset(size_t N) { return (Vec3f*)((int8_t*)this+6+37*N); }
- __forceinline const Vec3f* offset(size_t N) const { return (Vec3f*)((int8_t*)this+6+37*N); }
-
- __forceinline float* scale(size_t N) { return (float*)((int8_t*)this+6+37*N+12); }
- __forceinline const float* scale(size_t N) const { return (float*)((int8_t*)this+6+37*N+12); }
-
- __forceinline float& time_offset(size_t N) { return *(float*)((int8_t*)this+6+37*N+16); }
- __forceinline const float& time_offset(size_t N) const { return *(float*)((int8_t*)this+6+37*N+16); }
-
- __forceinline float& time_scale(size_t N) { return *(float*)((int8_t*)this+6+37*N+20); }
- __forceinline const float& time_scale(size_t N) const { return *(float*)((int8_t*)this+6+37*N+20); }
-
- __forceinline int8_t* end(size_t N) { return (int8_t*)this+6+37*N+24; }
- __forceinline const int8_t* end(size_t N) const { return (int8_t*)this+6+37*N+24; }
- };
-
- template<int M>
- typename CurveNiMB<M>::Type CurveNiMB<M>::type;
-
- typedef CurveNiMB<4> Curve4iMB;
- typedef CurveNiMB<8> Curve8iMB;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb_intersector.h
deleted file mode 100644
index 0cbc764668..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb_intersector.h
+++ /dev/null
@@ -1,516 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curveNi_mb.h"
-#include "../subdiv/linear_bezier_patch.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct CurveNiMBIntersector1
- {
- typedef CurveNiMB<M> Primitive;
- typedef Vec3vf<M> Vec3vfM;
- typedef LinearSpace3<Vec3vfM>LinearSpace3vfM;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline vbool<M> intersect(Ray& ray, const Primitive& prim, vfloat<M>& tNear_o)
- {
- const size_t N = prim.N;
- const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N));
- const Vec3fa offset = Vec3fa(offset_scale);
- const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale));
- const Vec3fa org1 = (ray.org-offset)*scale;
- const Vec3fa dir1 = ray.dir*scale;
-
- const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)),
- vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)),
- vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N)));
-
- const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1));
- const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1));
- const Vec3vfM rcp_dir2 = rcp_safe(dir2);
-
- const vfloat<M> ltime = (ray.time()-prim.time_offset(N))*prim.time_scale(N);
- const vfloat<M> vx_lower0 = vfloat<M>::load(prim.bounds_vx_lower0(N));
- const vfloat<M> vx_lower1 = vfloat<M>::load(prim.bounds_vx_lower1(N));
- const vfloat<M> vx_lower = madd(ltime,vx_lower1-vx_lower0,vx_lower0);
- const vfloat<M> vx_upper0 = vfloat<M>::load(prim.bounds_vx_upper0(N));
- const vfloat<M> vx_upper1 = vfloat<M>::load(prim.bounds_vx_upper1(N));
- const vfloat<M> vx_upper = madd(ltime,vx_upper1-vx_upper0,vx_upper0);
-
- const vfloat<M> vy_lower0 = vfloat<M>::load(prim.bounds_vy_lower0(N));
- const vfloat<M> vy_lower1 = vfloat<M>::load(prim.bounds_vy_lower1(N));
- const vfloat<M> vy_lower = madd(ltime,vy_lower1-vy_lower0,vy_lower0);
- const vfloat<M> vy_upper0 = vfloat<M>::load(prim.bounds_vy_upper0(N));
- const vfloat<M> vy_upper1 = vfloat<M>::load(prim.bounds_vy_upper1(N));
- const vfloat<M> vy_upper = madd(ltime,vy_upper1-vy_upper0,vy_upper0);
-
- const vfloat<M> vz_lower0 = vfloat<M>::load(prim.bounds_vz_lower0(N));
- const vfloat<M> vz_lower1 = vfloat<M>::load(prim.bounds_vz_lower1(N));
- const vfloat<M> vz_lower = madd(ltime,vz_lower1-vz_lower0,vz_lower0);
- const vfloat<M> vz_upper0 = vfloat<M>::load(prim.bounds_vz_upper0(N));
- const vfloat<M> vz_upper1 = vfloat<M>::load(prim.bounds_vz_upper1(N));
- const vfloat<M> vz_upper = madd(ltime,vz_upper1-vz_upper0,vz_upper0);
-
- const vfloat<M> t_lower_x = (vx_lower-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
- const vfloat<M> t_upper_x = (vx_upper-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
- const vfloat<M> t_lower_y = (vy_lower-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
- const vfloat<M> t_upper_y = (vy_upper-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
- const vfloat<M> t_lower_z = (vz_lower-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
- const vfloat<M> t_upper_z = (vz_upper-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
-
- const vfloat<M> round_up (1.0f+3.0f*float(ulp));
- const vfloat<M> round_down(1.0f-3.0f*float(ulp));
- const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear()));
- const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar));
- tNear_o = tNear;
- return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar);
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_t(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time());
-
- Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_t(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time());
-
- if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_n(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time());
- Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_n(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time());
-
- if (Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_h(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time());
- Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_h(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time());
- if (Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_hn(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time());
- Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_hn(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time());
- if (Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
- };
-
- template<int M, int K>
- struct CurveNiMBIntersectorK
- {
- typedef CurveNiMB<M> Primitive;
- typedef Vec3vf<M> Vec3vfM;
- typedef LinearSpace3<Vec3vfM>LinearSpace3vfM;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline vbool<M> intersect(RayK<K>& ray, const size_t k, const Primitive& prim, vfloat<M>& tNear_o)
- {
- const size_t N = prim.N;
- const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N));
- const Vec3fa offset = Vec3fa(offset_scale);
- const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale));
-
- const Vec3fa ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]);
- const Vec3fa ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]);
- const Vec3fa org1 = (ray_org-offset)*scale;
- const Vec3fa dir1 = ray_dir*scale;
-
- const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)),
- vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)),
- vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N)));
-
- const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1));
- const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1));
- const Vec3vfM rcp_dir2 = rcp_safe(dir2);
-
- const vfloat<M> ltime = (ray.time()[k]-prim.time_offset(N))*prim.time_scale(N);
- const vfloat<M> vx_lower0 = vfloat<M>::load(prim.bounds_vx_lower0(N));
- const vfloat<M> vx_lower1 = vfloat<M>::load(prim.bounds_vx_lower1(N));
- const vfloat<M> vx_lower = madd(ltime,vx_lower1-vx_lower0,vx_lower0);
- const vfloat<M> vx_upper0 = vfloat<M>::load(prim.bounds_vx_upper0(N));
- const vfloat<M> vx_upper1 = vfloat<M>::load(prim.bounds_vx_upper1(N));
- const vfloat<M> vx_upper = madd(ltime,vx_upper1-vx_upper0,vx_upper0);
-
- const vfloat<M> vy_lower0 = vfloat<M>::load(prim.bounds_vy_lower0(N));
- const vfloat<M> vy_lower1 = vfloat<M>::load(prim.bounds_vy_lower1(N));
- const vfloat<M> vy_lower = madd(ltime,vy_lower1-vy_lower0,vy_lower0);
- const vfloat<M> vy_upper0 = vfloat<M>::load(prim.bounds_vy_upper0(N));
- const vfloat<M> vy_upper1 = vfloat<M>::load(prim.bounds_vy_upper1(N));
- const vfloat<M> vy_upper = madd(ltime,vy_upper1-vy_upper0,vy_upper0);
-
- const vfloat<M> vz_lower0 = vfloat<M>::load(prim.bounds_vz_lower0(N));
- const vfloat<M> vz_lower1 = vfloat<M>::load(prim.bounds_vz_lower1(N));
- const vfloat<M> vz_lower = madd(ltime,vz_lower1-vz_lower0,vz_lower0);
- const vfloat<M> vz_upper0 = vfloat<M>::load(prim.bounds_vz_upper0(N));
- const vfloat<M> vz_upper1 = vfloat<M>::load(prim.bounds_vz_upper1(N));
- const vfloat<M> vz_upper = madd(ltime,vz_upper1-vz_upper0,vz_upper0);
-
- const vfloat<M> t_lower_x = (vx_lower-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
- const vfloat<M> t_upper_x = (vx_upper-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
- const vfloat<M> t_lower_y = (vy_lower-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
- const vfloat<M> t_upper_y = (vy_upper-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
- const vfloat<M> t_lower_z = (vz_lower-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
- const vfloat<M> t_upper_z = (vz_upper-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
-
- const vfloat<M> round_up (1.0f+3.0f*float(ulp));
- const vfloat<M> round_down(1.0f-3.0f*float(ulp));
- const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear()[k]));
- const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar[k]));
- tNear_o = tNear;
- return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar);
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_t(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
-
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time()[k]);
-
- Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_t(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time()[k]);
-
- if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_n(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
-
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]);
- Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_n(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]);
-
- if (Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_h(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
-
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time()[k]);
- Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_h(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time()[k]);
- if (Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_hn(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
-
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]);
- Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_hn(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]);
- if (Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNv.h b/thirdparty/embree-aarch64/kernels/geometry/curveNv.h
deleted file mode 100644
index 6eb5e30b39..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curveNv.h
+++ /dev/null
@@ -1,101 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curveNi.h"
-
-namespace embree
-{
- template<int M>
- struct CurveNv : public CurveNi<M>
- {
- using CurveNi<M>::N;
-
- struct Type : public PrimitiveType {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* Returns maximum number of stored primitives */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+M-1)/M; }
-
- static __forceinline size_t bytes(size_t N)
- {
- const size_t f = N/M, r = N%M;
- static_assert(sizeof(CurveNv) == 22+25*M+4*16*M, "internal data layout issue");
- return f*sizeof(CurveNv) + (r!=0)*(22 + 25*r + 4*16*r);
- }
-
- public:
-
- /*! Default constructor. */
- __forceinline CurveNv () {}
-
- /*! fill curve from curve list */
- __forceinline void fill(const PrimRef* prims, size_t& begin, size_t _end, Scene* scene)
- {
- size_t end = min(begin+M,_end);
- size_t N = end-begin;
-
- /* encode all primitives */
- for (size_t i=0; i<N; i++)
- {
- const PrimRef& prim = prims[begin+i];
- const unsigned int geomID = prim.geomID();
- const unsigned int primID = prim.primID();
- CurveGeometry* mesh = (CurveGeometry*) scene->get(geomID);
- const unsigned vtxID = mesh->curve(primID);
- Vec3fa::storeu(&this->vertices(i,N)[0],mesh->vertex(vtxID+0));
- Vec3fa::storeu(&this->vertices(i,N)[1],mesh->vertex(vtxID+1));
- Vec3fa::storeu(&this->vertices(i,N)[2],mesh->vertex(vtxID+2));
- Vec3fa::storeu(&this->vertices(i,N)[3],mesh->vertex(vtxID+3));
- }
- }
-
- template<typename BVH, typename Allocator>
- __forceinline static typename BVH::NodeRef createLeaf (BVH* bvh, const PrimRef* prims, const range<size_t>& set, const Allocator& alloc)
- {
- if (set.size() == 0)
- return BVH::emptyNode;
-
- /* fall back to CurveNi for oriented curves */
- unsigned int geomID = prims[set.begin()].geomID();
- if (bvh->scene->get(geomID)->getCurveType() == Geometry::GTY_SUBTYPE_ORIENTED_CURVE) {
- return CurveNi<M>::createLeaf(bvh,prims,set,alloc);
- }
- if (bvh->scene->get(geomID)->getCurveBasis() == Geometry::GTY_BASIS_HERMITE) {
- return CurveNi<M>::createLeaf(bvh,prims,set,alloc);
- }
-
- size_t start = set.begin();
- size_t items = CurveNv::blocks(set.size());
- size_t numbytes = CurveNv::bytes(set.size());
- CurveNv* accel = (CurveNv*) alloc.malloc1(numbytes,BVH::byteAlignment);
- for (size_t i=0; i<items; i++) {
- accel[i].CurveNv<M>::fill(prims,start,set.end(),bvh->scene);
- accel[i].CurveNi<M>::fill(prims,start,set.end(),bvh->scene);
- }
- return bvh->encodeLeaf((char*)accel,items);
- };
-
- public:
- unsigned char data[4*16*M];
- __forceinline Vec3fa* vertices(size_t i, size_t N) { return (Vec3fa*)CurveNi<M>::end(N)+4*i; }
- __forceinline const Vec3fa* vertices(size_t i, size_t N) const { return (Vec3fa*)CurveNi<M>::end(N)+4*i; }
- };
-
- template<int M>
- typename CurveNv<M>::Type CurveNv<M>::type;
-
- typedef CurveNv<4> Curve4v;
- typedef CurveNv<8> Curve8v;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNv_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/curveNv_intersector.h
deleted file mode 100644
index e20da2882e..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curveNv_intersector.h
+++ /dev/null
@@ -1,181 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curveNv.h"
-#include "curveNi_intersector.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct CurveNvIntersector1 : public CurveNiIntersector1<M>
- {
- typedef CurveNv<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_t(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = CurveNiIntersector1<M>::intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID);
- const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]);
- const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]);
- const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]);
- const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]);
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- prefetchL1(&prim.vertices(i1,N)[0]);
- prefetchL1(&prim.vertices(i1,N)[4]);
- if (mask1) {
- const size_t i2 = bsf(mask1);
- prefetchL2(&prim.vertices(i2,N)[0]);
- prefetchL2(&prim.vertices(i2,N)[4]);
- }
- }
-
- Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_t(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = CurveNiIntersector1<M>::intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID);
- const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]);
- const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]);
- const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]);
- const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]);
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- prefetchL1(&prim.vertices(i1,N)[0]);
- prefetchL1(&prim.vertices(i1,N)[4]);
- if (mask1) {
- const size_t i2 = bsf(mask1);
- prefetchL2(&prim.vertices(i2,N)[0]);
- prefetchL2(&prim.vertices(i2,N)[4]);
- }
- }
-
- if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
- };
-
- template<int M, int K>
- struct CurveNvIntersectorK : public CurveNiIntersectorK<M,K>
- {
- typedef CurveNv<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_t(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = CurveNiIntersectorK<M,K>::intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID);
- const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]);
- const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]);
- const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]);
- const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]);
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- prefetchL1(&prim.vertices(i1,N)[0]);
- prefetchL1(&prim.vertices(i1,N)[4]);
- if (mask1) {
- const size_t i2 = bsf(mask1);
- prefetchL2(&prim.vertices(i2,N)[0]);
- prefetchL2(&prim.vertices(i2,N)[4]);
- }
- }
-
- Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_t(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = CurveNiIntersectorK<M,K>::intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID);
- const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]);
- const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]);
- const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]);
- const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]);
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- prefetchL1(&prim.vertices(i1,N)[0]);
- prefetchL1(&prim.vertices(i1,N)[4]);
- if (mask1) {
- const size_t i2 = bsf(mask1);
- prefetchL2(&prim.vertices(i2,N)[0]);
- prefetchL2(&prim.vertices(i2,N)[4]);
- }
- }
-
- if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector.h
deleted file mode 100644
index 204958f7cc..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-#include "../subdiv/bezier_curve.h"
-#include "../common/primref.h"
-#include "bezier_hair_intersector.h"
-#include "bezier_ribbon_intersector.h"
-#include "bezier_curve_intersector.h"
-#include "oriented_curve_intersector.h"
-#include "../bvh/node_intersector1.h"
-
-// FIXME: this file seems replicate of curve_intersector_virtual.h
-
-namespace embree
-{
- namespace isa
- {
- struct VirtualCurveIntersector1
- {
- typedef unsigned char Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
- leafIntersector.intersect<1>(&pre,&ray,context,prim);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
- return leafIntersector.occluded<1>(&pre,&ray,context,prim);
- }
- };
-
- template<int K>
- struct VirtualCurveIntersectorK
- {
- typedef unsigned char Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
- size_t mask = movemask(valid_i);
- while (mask) leafIntersector.intersect<K>(&pre,&ray,bscf(mask),context,prim);
- }
-
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
- vbool<K> valid_o = false;
- size_t mask = movemask(valid_i);
- while (mask) {
- size_t k = bscf(mask);
- if (leafIntersector.occluded<K>(&pre,&ray,k,context,prim))
- set(valid_o, k);
- }
- return valid_o;
- }
-
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
- leafIntersector.intersect<K>(&pre,&ray,k,context,prim);
- }
-
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
- return leafIntersector.occluded<K>(&pre,&ray,k,context,prim);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_distance.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_distance.h
deleted file mode 100644
index 343cc8ff28..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_distance.h
+++ /dev/null
@@ -1,129 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "curve_intersector_precalculations.h"
-
-namespace embree
-{
- namespace isa
- {
- template<typename NativeCurve3fa, int M>
- struct DistanceCurveHit
- {
- __forceinline DistanceCurveHit() {}
-
- __forceinline DistanceCurveHit(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const int i, const int N,
- const NativeCurve3fa& curve3D)
- : U(U), V(V), T(T), i(i), N(N), curve3D(curve3D), valid(valid) {}
-
- __forceinline void finalize()
- {
- vu = (vfloat<M>(step)+U+vfloat<M>(float(i)))*(1.0f/float(N));
- vv = V;
- vt = T;
- }
-
- __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
- __forceinline float t (const size_t i) const { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) const {
- return curve3D.eval_du(vu[i]);
- }
-
- public:
- vfloat<M> U;
- vfloat<M> V;
- vfloat<M> T;
- int i, N;
- NativeCurve3fa curve3D;
-
- public:
- vbool<M> valid;
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- };
-
- template<typename NativeCurve3fa>
- struct DistanceCurve1Intersector1
- {
- template<typename Epilog>
- __forceinline bool intersect(const CurvePrecalculations1& pre,Ray& ray,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2, const Vec3fa& v3,
- const Epilog& epilog)
- {
- const int N = geom->tessellationRate;
-
- /* transform control points into ray space */
- const NativeCurve3fa curve3Di(v0,v1,v2,v3);
- const NativeCurve3fa curve3D = enlargeRadiusToMinWidth(context,geom,ray.org,curve3Di);
- const NativeCurve3fa curve2D = curve3D.xfm_pr(pre.ray_space,ray.org);
-
- /* evaluate the bezier curve */
- vboolx valid = vfloatx(step) < vfloatx(float(N));
- const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(0,N);
- const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(0,N);
-
- /* approximative intersection with cone */
- const Vec4vfx v = p1-p0;
- const Vec4vfx w = -p0;
- const vfloatx d0 = madd(w.x,v.x,w.y*v.y);
- const vfloatx d1 = madd(v.x,v.x,v.y*v.y);
- const vfloatx u = clamp(d0*rcp(d1),vfloatx(zero),vfloatx(one));
- const Vec4vfx p = madd(u,v,p0);
- const vfloatx t = p.z*pre.depth_scale;
- const vfloatx d2 = madd(p.x,p.x,p.y*p.y);
- const vfloatx r = p.w;
- const vfloatx r2 = r*r;
- valid &= (d2 <= r2) & (vfloatx(ray.tnear()) <= t) & (t <= vfloatx(ray.tfar));
- if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f)
- valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*pre.depth_scale; // ignore self intersections
-
- /* update hit information */
- bool ishit = false;
- if (unlikely(any(valid))) {
- DistanceCurveHit<NativeCurve3fa,VSIZEX> hit(valid,u,0.0f,t,0,N,curve3D);
- ishit = ishit | epilog(valid,hit);
- }
-
- if (unlikely(VSIZEX < N))
- {
- /* process SIMD-size many segments per iteration */
- for (int i=VSIZEX; i<N; i+=VSIZEX)
- {
- /* evaluate the bezier curve */
- vboolx valid = vintx(i)+vintx(step) < vintx(N);
- const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(i,N);
- const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(i,N);
-
- /* approximative intersection with cone */
- const Vec4vfx v = p1-p0;
- const Vec4vfx w = -p0;
- const vfloatx d0 = madd(w.x,v.x,w.y*v.y);
- const vfloatx d1 = madd(v.x,v.x,v.y*v.y);
- const vfloatx u = clamp(d0*rcp(d1),vfloatx(zero),vfloatx(one));
- const Vec4vfx p = madd(u,v,p0);
- const vfloatx t = p.z*pre.depth_scale;
- const vfloatx d2 = madd(p.x,p.x,p.y*p.y);
- const vfloatx r = p.w;
- const vfloatx r2 = r*r;
- valid &= (d2 <= r2) & (vfloatx(ray.tnear()) <= t) & (t <= vfloatx(ray.tfar));
- if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f)
- valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*pre.depth_scale; // ignore self intersections
-
- /* update hit information */
- if (unlikely(any(valid))) {
- DistanceCurveHit<NativeCurve3fa,VSIZEX> hit(valid,u,0.0f,t,i,N,curve3D);
- ishit = ishit | epilog(valid,hit);
- }
- }
- }
- return ishit;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_oriented.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_oriented.h
deleted file mode 100644
index 47531027fc..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_oriented.h
+++ /dev/null
@@ -1,417 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "curve_intersector_precalculations.h"
-#include "curve_intersector_sweep.h"
-#include "../subdiv/linear_bezier_patch.h"
-
-#define DBG(x)
-
-namespace embree
-{
- namespace isa
- {
- template<typename Ray, typename Epilog>
- struct TensorLinearCubicBezierSurfaceIntersector
- {
- const LinearSpace3fa& ray_space;
- Ray& ray;
- TensorLinearCubicBezierSurface3fa curve3d;
- TensorLinearCubicBezierSurface2fa curve2d;
- float eps;
- const Epilog& epilog;
- bool isHit;
-
- __forceinline TensorLinearCubicBezierSurfaceIntersector (const LinearSpace3fa& ray_space, Ray& ray, const TensorLinearCubicBezierSurface3fa& curve3d, const Epilog& epilog)
- : ray_space(ray_space), ray(ray), curve3d(curve3d), epilog(epilog), isHit(false)
- {
- const TensorLinearCubicBezierSurface3fa curve3dray = curve3d.xfm(ray_space,ray.org);
- curve2d = TensorLinearCubicBezierSurface2fa(CubicBezierCurve2fa(curve3dray.L),CubicBezierCurve2fa(curve3dray.R));
- const BBox2fa b2 = curve2d.bounds();
- eps = 8.0f*float(ulp)*reduce_max(max(abs(b2.lower),abs(b2.upper)));
- }
-
- __forceinline Interval1f solve_linear(const float u0, const float u1, const float& p0, const float& p1)
- {
- if (p1 == p0) {
- if (p0 == 0.0f) return Interval1f(u0,u1);
- else return Interval1f(empty);
- }
- const float t = -p0/(p1-p0);
- const float tt = lerp(u0,u1,t);
- return Interval1f(tt);
- }
-
- __forceinline void solve_linear(const float u0, const float u1, const Interval1f& p0, const Interval1f& p1, Interval1f& u)
- {
- if (sign(p0.lower) != sign(p0.upper)) u.extend(u0);
- if (sign(p0.lower) != sign(p1.lower)) u.extend(solve_linear(u0,u1,p0.lower,p1.lower));
- if (sign(p0.upper) != sign(p1.upper)) u.extend(solve_linear(u0,u1,p0.upper,p1.upper));
- if (sign(p1.lower) != sign(p1.upper)) u.extend(u1);
- }
-
- __forceinline Interval1f bezier_clipping(const CubicBezierCurve<Interval1f>& curve)
- {
- Interval1f u = empty;
- solve_linear(0.0f/3.0f,1.0f/3.0f,curve.v0,curve.v1,u);
- solve_linear(0.0f/3.0f,2.0f/3.0f,curve.v0,curve.v2,u);
- solve_linear(0.0f/3.0f,3.0f/3.0f,curve.v0,curve.v3,u);
- solve_linear(1.0f/3.0f,2.0f/3.0f,curve.v1,curve.v2,u);
- solve_linear(1.0f/3.0f,3.0f/3.0f,curve.v1,curve.v3,u);
- solve_linear(2.0f/3.0f,3.0f/3.0f,curve.v2,curve.v3,u);
- return intersect(u,Interval1f(0.0f,1.0f));
- }
-
- __forceinline Interval1f bezier_clipping(const LinearBezierCurve<Interval1f>& curve)
- {
- Interval1f v = empty;
- solve_linear(0.0f,1.0f,curve.v0,curve.v1,v);
- return intersect(v,Interval1f(0.0f,1.0f));
- }
-
- __forceinline void solve_bezier_clipping(BBox1f cu, BBox1f cv, const TensorLinearCubicBezierSurface2fa& curve2)
- {
- BBox2fa bounds = curve2.bounds();
- if (bounds.upper.x < 0.0f) return;
- if (bounds.upper.y < 0.0f) return;
- if (bounds.lower.x > 0.0f) return;
- if (bounds.lower.y > 0.0f) return;
-
- if (max(cu.size(),cv.size()) < 1E-4f)
- {
- const float u = cu.center();
- const float v = cv.center();
- TensorLinearCubicBezierSurface1f curve_z = curve3d.xfm(ray_space.row2(),ray.org);
- const float t = curve_z.eval(u,v);
- if (ray.tnear() <= t && t <= ray.tfar) {
- const Vec3fa Ng = cross(curve3d.eval_du(u,v),curve3d.eval_dv(u,v));
- BezierCurveHit hit(t,u,v,Ng);
- isHit |= epilog(hit);
- }
- return;
- }
-
- const Vec2fa dv = curve2.axis_v();
- const TensorLinearCubicBezierSurface1f curve1v = curve2.xfm(dv);
- LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u();
- if (!curve0v.hasRoot()) return;
-
- const Interval1f v = bezier_clipping(curve0v);
- if (isEmpty(v)) return;
- TensorLinearCubicBezierSurface2fa curve2a = curve2.clip_v(v);
- cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper));
-
- const Vec2fa du = curve2.axis_u();
- const TensorLinearCubicBezierSurface1f curve1u = curve2a.xfm(du);
- CubicBezierCurve<Interval1f> curve0u = curve1u.reduce_v();
- int roots = curve0u.maxRoots();
- if (roots == 0) return;
-
- if (roots == 1)
- {
- const Interval1f u = bezier_clipping(curve0u);
- if (isEmpty(u)) return;
- TensorLinearCubicBezierSurface2fa curve2b = curve2a.clip_u(u);
- cu = BBox1f(lerp(cu.lower,cu.upper,u.lower),lerp(cu.lower,cu.upper,u.upper));
- solve_bezier_clipping(cu,cv,curve2b);
- return;
- }
-
- TensorLinearCubicBezierSurface2fa curve2l, curve2r;
- curve2a.split_u(curve2l,curve2r);
- solve_bezier_clipping(BBox1f(cu.lower,cu.center()),cv,curve2l);
- solve_bezier_clipping(BBox1f(cu.center(),cu.upper),cv,curve2r);
- }
-
- __forceinline bool solve_bezier_clipping()
- {
- solve_bezier_clipping(BBox1f(0.0f,1.0f),BBox1f(0.0f,1.0f),curve2d);
- return isHit;
- }
-
- __forceinline void solve_newton_raphson(BBox1f cu, BBox1f cv)
- {
- Vec2fa uv(cu.center(),cv.center());
- const Vec2fa dfdu = curve2d.eval_du(uv.x,uv.y);
- const Vec2fa dfdv = curve2d.eval_dv(uv.x,uv.y);
- const LinearSpace2fa rcp_J = rcp(LinearSpace2fa(dfdu,dfdv));
- solve_newton_raphson_loop(cu,cv,uv,dfdu,dfdv,rcp_J);
- }
-
- __forceinline void solve_newton_raphson_loop(BBox1f cu, BBox1f cv, const Vec2fa& uv_in, const Vec2fa& dfdu, const Vec2fa& dfdv, const LinearSpace2fa& rcp_J)
- {
- Vec2fa uv = uv_in;
-
- for (size_t i=0; i<200; i++)
- {
- const Vec2fa f = curve2d.eval(uv.x,uv.y);
- const Vec2fa duv = rcp_J*f;
- uv -= duv;
-
- if (max(abs(f.x),abs(f.y)) < eps)
- {
- const float u = uv.x;
- const float v = uv.y;
- if (!(u >= 0.0f && u <= 1.0f)) return; // rejects NaNs
- if (!(v >= 0.0f && v <= 1.0f)) return; // rejects NaNs
- const TensorLinearCubicBezierSurface1f curve_z = curve3d.xfm(ray_space.row2(),ray.org);
- const float t = curve_z.eval(u,v);
- if (!(ray.tnear() <= t && t <= ray.tfar)) return; // rejects NaNs
- const Vec3fa Ng = cross(curve3d.eval_du(u,v),curve3d.eval_dv(u,v));
- BezierCurveHit hit(t,u,v,Ng);
- isHit |= epilog(hit);
- return;
- }
- }
- }
-
- __forceinline bool clip_v(BBox1f& cu, BBox1f& cv)
- {
- const Vec2fa dv = curve2d.eval_dv(cu.lower,cv.lower);
- const TensorLinearCubicBezierSurface1f curve1v = curve2d.xfm(dv).clip(cu,cv);
- LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u();
- if (!curve0v.hasRoot()) return false;
- Interval1f v = bezier_clipping(curve0v);
- if (isEmpty(v)) return false;
- v = intersect(v + Interval1f(-0.1f,+0.1f),Interval1f(0.0f,1.0f));
- cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper));
- return true;
- }
-
- __forceinline bool solve_krawczyk(bool very_small, BBox1f& cu, BBox1f& cv)
- {
- /* perform bezier clipping in v-direction to get tight v-bounds */
- TensorLinearCubicBezierSurface2fa curve2 = curve2d.clip(cu,cv);
- const Vec2fa dv = curve2.axis_v();
- const TensorLinearCubicBezierSurface1f curve1v = curve2.xfm(dv);
- LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u();
- if (unlikely(!curve0v.hasRoot())) return true;
- Interval1f v = bezier_clipping(curve0v);
- if (unlikely(isEmpty(v))) return true;
- v = intersect(v + Interval1f(-0.1f,+0.1f),Interval1f(0.0f,1.0f));
- curve2 = curve2.clip_v(v);
- cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper));
-
- /* perform one newton raphson iteration */
- Vec2fa c(cu.center(),cv.center());
- Vec2fa f,dfdu,dfdv; curve2d.eval(c.x,c.y,f,dfdu,dfdv);
- const LinearSpace2fa rcp_J = rcp(LinearSpace2fa(dfdu,dfdv));
- const Vec2fa c1 = c - rcp_J*f;
-
- /* calculate bounds of derivatives */
- const BBox2fa bounds_du = (1.0f/cu.size())*curve2.derivative_u().bounds();
- const BBox2fa bounds_dv = (1.0f/cv.size())*curve2.derivative_v().bounds();
-
- /* calculate krawczyk test */
- LinearSpace2<Vec2<Interval1f>> I(Interval1f(1.0f), Interval1f(0.0f),
- Interval1f(0.0f), Interval1f(1.0f));
-
- LinearSpace2<Vec2<Interval1f>> G(Interval1f(bounds_du.lower.x,bounds_du.upper.x), Interval1f(bounds_dv.lower.x,bounds_dv.upper.x),
- Interval1f(bounds_du.lower.y,bounds_du.upper.y), Interval1f(bounds_dv.lower.y,bounds_dv.upper.y));
-
- const LinearSpace2<Vec2f> rcp_J2(rcp_J);
- const LinearSpace2<Vec2<Interval1f>> rcp_Ji(rcp_J2);
-
- const Vec2<Interval1f> x(cu,cv);
- const Vec2<Interval1f> K = Vec2<Interval1f>(Vec2f(c1)) + (I - rcp_Ji*G)*(x-Vec2<Interval1f>(Vec2f(c)));
-
- /* test if there is no solution */
- const Vec2<Interval1f> KK = intersect(K,x);
- if (unlikely(isEmpty(KK.x) || isEmpty(KK.y))) return true;
-
- /* exit if convergence cannot get proven, but terminate if we are very small */
- if (unlikely(!subset(K,x) && !very_small)) return false;
-
- /* solve using newton raphson iteration of convergence is guarenteed */
- solve_newton_raphson_loop(cu,cv,c1,dfdu,dfdv,rcp_J);
- return true;
- }
-
- __forceinline void solve_newton_raphson_no_recursion(BBox1f cu, BBox1f cv)
- {
- if (!clip_v(cu,cv)) return;
- return solve_newton_raphson(cu,cv);
- }
-
- __forceinline void solve_newton_raphson_recursion(BBox1f cu, BBox1f cv)
- {
- unsigned int sptr = 0;
- const unsigned int stack_size = 4;
- unsigned int mask_stack[stack_size];
- BBox1f cu_stack[stack_size];
- BBox1f cv_stack[stack_size];
- goto entry;
-
- /* terminate if stack is empty */
- while (sptr)
- {
- /* pop from stack */
- {
- sptr--;
- size_t mask = mask_stack[sptr];
- cu = cu_stack[sptr];
- cv = cv_stack[sptr];
- const size_t i = bscf(mask);
- mask_stack[sptr] = mask;
- if (mask) sptr++; // there are still items on the stack
-
- /* process next element recurse into each hit curve segment */
- const float u0 = float(i+0)*(1.0f/(VSIZEX-1));
- const float u1 = float(i+1)*(1.0f/(VSIZEX-1));
- const BBox1f cui(lerp(cu.lower,cu.upper,u0),lerp(cu.lower,cu.upper,u1));
- cu = cui;
- }
-
-#if 0
- solve_newton_raphson_no_recursion(cu,cv);
- continue;
-
-#else
- /* we assume convergence for small u ranges and verify using krawczyk */
- if (cu.size() < 1.0f/6.0f) {
- const bool very_small = cu.size() < 0.001f || sptr >= stack_size;
- if (solve_krawczyk(very_small,cu,cv)) {
- continue;
- }
- }
-#endif
-
- entry:
-
- /* split the curve into VSIZEX-1 segments in u-direction */
- vboolx valid = true;
- TensorLinearCubicBezierSurface<Vec2vfx> subcurves = curve2d.clip_v(cv).vsplit_u(valid,cu);
-
- /* slabs test in u-direction */
- Vec2vfx ndv = cross(subcurves.axis_v());
- BBox<vfloatx> boundsv = subcurves.vxfm(ndv).bounds();
- valid &= boundsv.lower <= eps;
- valid &= boundsv.upper >= -eps;
- if (none(valid)) continue;
-
- /* slabs test in v-direction */
- Vec2vfx ndu = cross(subcurves.axis_u());
- BBox<vfloatx> boundsu = subcurves.vxfm(ndu).bounds();
- valid &= boundsu.lower <= eps;
- valid &= boundsu.upper >= -eps;
- if (none(valid)) continue;
-
- /* push valid segments to stack */
- assert(sptr < stack_size);
- mask_stack [sptr] = movemask(valid);
- cu_stack [sptr] = cu;
- cv_stack [sptr] = cv;
- sptr++;
- }
- }
-
- __forceinline bool solve_newton_raphson_main()
- {
- BBox1f vu(0.0f,1.0f);
- BBox1f vv(0.0f,1.0f);
- solve_newton_raphson_recursion(vu,vv);
- return isHit;
- }
- };
-
-
- template<template<typename Ty> class SourceCurve>
- struct OrientedCurve1Intersector1
- {
- //template<typename Ty> using Curve = SourceCurve<Ty>;
- typedef SourceCurve<Vec3ff> SourceCurve3ff;
- typedef SourceCurve<Vec3fa> SourceCurve3fa;
-
- __forceinline OrientedCurve1Intersector1() {}
-
- __forceinline OrientedCurve1Intersector1(const Ray& ray, const void* ptr) {}
-
- template<typename Epilog>
- __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const Vec3ff& v0i, const Vec3ff& v1i, const Vec3ff& v2i, const Vec3ff& v3i,
- const Vec3fa& n0i, const Vec3fa& n1i, const Vec3fa& n2i, const Vec3fa& n3i,
- const Epilog& epilog) const
- {
- STAT3(normal.trav_prims,1,1,1);
-
- SourceCurve3ff ccurve(v0i,v1i,v2i,v3i);
- SourceCurve3fa ncurve(n0i,n1i,n2i,n3i);
- ccurve = enlargeRadiusToMinWidth(context,geom,ray.org,ccurve);
- TensorLinearCubicBezierSurface3fa curve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
- //return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_bezier_clipping();
- return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_newton_raphson_main();
- }
-
- template<typename Epilog>
- __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const TensorLinearCubicBezierSurface3fa& curve, const Epilog& epilog) const
- {
- STAT3(normal.trav_prims,1,1,1);
- //return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_bezier_clipping();
- return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_newton_raphson_main();
- }
- };
-
- template<template<typename Ty> class SourceCurve, int K>
- struct OrientedCurve1IntersectorK
- {
- //template<typename Ty> using Curve = SourceCurve<Ty>;
- typedef SourceCurve<Vec3ff> SourceCurve3ff;
- typedef SourceCurve<Vec3fa> SourceCurve3fa;
-
- struct Ray1
- {
- __forceinline Ray1(RayK<K>& ray, size_t k)
- : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {}
-
- Vec3fa org;
- Vec3fa dir;
- float _tnear;
- float& tfar;
-
- __forceinline float& tnear() { return _tnear; }
- //__forceinline float& tfar() { return _tfar; }
- __forceinline const float& tnear() const { return _tnear; }
- //__forceinline const float& tfar() const { return _tfar; }
- };
-
- template<typename Epilog>
- __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const Vec3ff& v0i, const Vec3ff& v1i, const Vec3ff& v2i, const Vec3ff& v3i,
- const Vec3fa& n0i, const Vec3fa& n1i, const Vec3fa& n2i, const Vec3fa& n3i,
- const Epilog& epilog)
- {
- STAT3(normal.trav_prims,1,1,1);
- Ray1 ray(vray,k);
- SourceCurve3ff ccurve(v0i,v1i,v2i,v3i);
- SourceCurve3fa ncurve(n0i,n1i,n2i,n3i);
- ccurve = enlargeRadiusToMinWidth(context,geom,ray.org,ccurve);
- TensorLinearCubicBezierSurface3fa curve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
- //return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_bezier_clipping();
- return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_newton_raphson_main();
- }
-
- template<typename Epilog>
- __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const TensorLinearCubicBezierSurface3fa& curve,
- const Epilog& epilog)
- {
- STAT3(normal.trav_prims,1,1,1);
- Ray1 ray(vray,k);
- //return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_bezier_clipping();
- return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_newton_raphson_main();
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_precalculations.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_precalculations.h
deleted file mode 100644
index 6e9fc91925..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_precalculations.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "../common/geometry.h"
-
-namespace embree
-{
- namespace isa
- {
- struct CurvePrecalculations1
- {
- float depth_scale;
- LinearSpace3fa ray_space;
-
- __forceinline CurvePrecalculations1() {}
-
- __forceinline CurvePrecalculations1(const Ray& ray, const void* ptr)
- {
- depth_scale = rsqrt(dot(ray.dir,ray.dir));
- LinearSpace3fa space = frame(depth_scale*ray.dir);
- space.vz *= depth_scale;
- ray_space = space.transposed();
- }
- };
-
- template<int K>
- struct CurvePrecalculationsK
- {
- vfloat<K> depth_scale;
- LinearSpace3fa ray_space[K];
-
- __forceinline CurvePrecalculationsK(const vbool<K>& valid, const RayK<K>& ray)
- {
- size_t mask = movemask(valid);
- depth_scale = rsqrt(dot(ray.dir,ray.dir));
- while (mask) {
- size_t k = bscf(mask);
- Vec3fa ray_dir_k = Vec3fa(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]);
- LinearSpace3fa ray_space_k = frame(depth_scale[k]*ray_dir_k);
- ray_space_k.vz *= depth_scale[k];
- ray_space[k] = ray_space_k.transposed();
- }
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h
deleted file mode 100644
index a99cf99d56..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h
+++ /dev/null
@@ -1,214 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "quad_intersector.h"
-#include "curve_intersector_precalculations.h"
-
-#define Bezier1Intersector1 RibbonCurve1Intersector1
-#define Bezier1IntersectorK RibbonCurve1IntersectorK
-
-namespace embree
-{
- namespace isa
- {
- template<typename NativeCurve3ff, int M>
- struct RibbonHit
- {
- __forceinline RibbonHit() {}
-
- __forceinline RibbonHit(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const int i, const int N,
- const NativeCurve3ff& curve3D)
- : U(U), V(V), T(T), i(i), N(N), curve3D(curve3D), valid(valid) {}
-
- __forceinline void finalize()
- {
- vu = (vfloat<M>(step)+U+vfloat<M>(float(i)))*(1.0f/float(N));
- vv = V;
- vt = T;
- }
-
- __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
- __forceinline float t (const size_t i) const { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) const {
- return curve3D.eval_du(vu[i]);
- }
-
- public:
- vfloat<M> U;
- vfloat<M> V;
- vfloat<M> T;
- int i, N;
- NativeCurve3ff curve3D;
-
- public:
- vbool<M> valid;
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- };
-
- /* calculate squared distance of point p0 to line p1->p2 */
- __forceinline std::pair<vfloatx,vfloatx> sqr_point_line_distance(const Vec2vfx& p0, const Vec2vfx& p1, const Vec2vfx& p2)
- {
- const vfloatx num = det(p2-p1,p1-p0);
- const vfloatx den2 = dot(p2-p1,p2-p1);
- return std::make_pair(num*num,den2);
- }
-
- /* performs culling against a cylinder */
- __forceinline vboolx cylinder_culling_test(const Vec2vfx& p0, const Vec2vfx& p1, const Vec2vfx& p2, const vfloatx& r)
- {
- const std::pair<vfloatx,vfloatx> d = sqr_point_line_distance(p0,p1,p2);
- return d.first <= r*r*d.second;
- }
-
- template<typename NativeCurve3ff, typename Epilog>
- __forceinline bool intersect_ribbon(const Vec3fa& ray_org, const Vec3fa& ray_dir, const float ray_tnear, const float& ray_tfar,
- const LinearSpace3fa& ray_space, const float& depth_scale,
- const NativeCurve3ff& curve3D, const int N,
- const Epilog& epilog)
- {
- /* transform control points into ray space */
- const NativeCurve3ff curve2D = curve3D.xfm_pr(ray_space,ray_org);
- float eps = 4.0f*float(ulp)*reduce_max(max(abs(curve2D.v0),abs(curve2D.v1),abs(curve2D.v2),abs(curve2D.v3)));
-
- /* evaluate the bezier curve */
- bool ishit = false;
- vboolx valid = vfloatx(step) < vfloatx(float(N));
- const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(0,N);
- const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(0,N);
- valid &= cylinder_culling_test(zero,Vec2vfx(p0.x,p0.y),Vec2vfx(p1.x,p1.y),max(p0.w,p1.w));
-
- if (any(valid))
- {
- Vec3vfx dp0dt = curve2D.template derivative0<VSIZEX>(0,N);
- Vec3vfx dp1dt = curve2D.template derivative1<VSIZEX>(0,N);
- dp0dt = select(reduce_max(abs(dp0dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp0dt);
- dp1dt = select(reduce_max(abs(dp1dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp1dt);
- const Vec3vfx n0(dp0dt.y,-dp0dt.x,0.0f);
- const Vec3vfx n1(dp1dt.y,-dp1dt.x,0.0f);
- const Vec3vfx nn0 = normalize(n0);
- const Vec3vfx nn1 = normalize(n1);
- const Vec3vfx lp0 = madd(p0.w,nn0,Vec3vfx(p0));
- const Vec3vfx lp1 = madd(p1.w,nn1,Vec3vfx(p1));
- const Vec3vfx up0 = nmadd(p0.w,nn0,Vec3vfx(p0));
- const Vec3vfx up1 = nmadd(p1.w,nn1,Vec3vfx(p1));
-
- vfloatx vu,vv,vt;
- vboolx valid0 = intersect_quad_backface_culling(valid,zero,Vec3fa(0,0,1),ray_tnear,ray_tfar,lp0,lp1,up1,up0,vu,vv,vt);
-
- if (any(valid0))
- {
- /* ignore self intersections */
- if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) {
- vfloatx r = lerp(p0.w, p1.w, vu);
- valid0 &= vt > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale;
- }
-
- if (any(valid0))
- {
- vv = madd(2.0f,vv,vfloatx(-1.0f));
- RibbonHit<NativeCurve3ff,VSIZEX> bhit(valid0,vu,vv,vt,0,N,curve3D);
- ishit |= epilog(bhit.valid,bhit);
- }
- }
- }
-
- if (unlikely(VSIZEX < N))
- {
- /* process SIMD-size many segments per iteration */
- for (int i=VSIZEX; i<N; i+=VSIZEX)
- {
- /* evaluate the bezier curve */
- vboolx valid = vintx(i)+vintx(step) < vintx(N);
- const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(i,N);
- const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(i,N);
- valid &= cylinder_culling_test(zero,Vec2vfx(p0.x,p0.y),Vec2vfx(p1.x,p1.y),max(p0.w,p1.w));
- if (none(valid)) continue;
-
- Vec3vfx dp0dt = curve2D.template derivative0<VSIZEX>(i,N);
- Vec3vfx dp1dt = curve2D.template derivative1<VSIZEX>(i,N);
- dp0dt = select(reduce_max(abs(dp0dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp0dt);
- dp1dt = select(reduce_max(abs(dp1dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp1dt);
- const Vec3vfx n0(dp0dt.y,-dp0dt.x,0.0f);
- const Vec3vfx n1(dp1dt.y,-dp1dt.x,0.0f);
- const Vec3vfx nn0 = normalize(n0);
- const Vec3vfx nn1 = normalize(n1);
- const Vec3vfx lp0 = madd(p0.w,nn0,Vec3vfx(p0));
- const Vec3vfx lp1 = madd(p1.w,nn1,Vec3vfx(p1));
- const Vec3vfx up0 = nmadd(p0.w,nn0,Vec3vfx(p0));
- const Vec3vfx up1 = nmadd(p1.w,nn1,Vec3vfx(p1));
-
- vfloatx vu,vv,vt;
- vboolx valid0 = intersect_quad_backface_culling(valid,zero,Vec3fa(0,0,1),ray_tnear,ray_tfar,lp0,lp1,up1,up0,vu,vv,vt);
-
- if (any(valid0))
- {
- /* ignore self intersections */
- if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) {
- vfloatx r = lerp(p0.w, p1.w, vu);
- valid0 &= vt > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale;
- }
-
- if (any(valid0))
- {
- vv = madd(2.0f,vv,vfloatx(-1.0f));
- RibbonHit<NativeCurve3ff,VSIZEX> bhit(valid0,vu,vv,vt,i,N,curve3D);
- ishit |= epilog(bhit.valid,bhit);
- }
- }
- }
- }
- return ishit;
- }
-
- template<template<typename Ty> class NativeCurve>
- struct RibbonCurve1Intersector1
- {
- typedef NativeCurve<Vec3ff> NativeCurve3ff;
-
- template<typename Epilog>
- __forceinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
- const Epilog& epilog)
- {
- const int N = geom->tessellationRate;
- NativeCurve3ff curve(v0,v1,v2,v3);
- curve = enlargeRadiusToMinWidth(context,geom,ray.org,curve);
- return intersect_ribbon<NativeCurve3ff>(ray.org,ray.dir,ray.tnear(),ray.tfar,
- pre.ray_space,pre.depth_scale,
- curve,N,
- epilog);
- }
- };
-
- template<template<typename Ty> class NativeCurve, int K>
- struct RibbonCurve1IntersectorK
- {
- typedef NativeCurve<Vec3ff> NativeCurve3ff;
-
- template<typename Epilog>
- __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& ray, size_t k,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
- const Epilog& epilog)
- {
- const int N = geom->tessellationRate;
- const Vec3fa ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]);
- const Vec3fa ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]);
- NativeCurve3ff curve(v0,v1,v2,v3);
- curve = enlargeRadiusToMinWidth(context,geom,ray_org,curve);
- return intersect_ribbon<NativeCurve3ff>(ray_org,ray_dir,ray.tnear()[k],ray.tfar[k],
- pre.ray_space[k],pre.depth_scale[k],
- curve,N,
- epilog);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h
deleted file mode 100644
index 883cedc3d2..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h
+++ /dev/null
@@ -1,362 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "cylinder.h"
-#include "plane.h"
-#include "line_intersector.h"
-#include "curve_intersector_precalculations.h"
-
-namespace embree
-{
- namespace isa
- {
- static const size_t numJacobianIterations = 5;
-#if defined(__AVX__)
- static const size_t numBezierSubdivisions = 2;
-#else
- static const size_t numBezierSubdivisions = 3;
-#endif
-
- struct BezierCurveHit
- {
- __forceinline BezierCurveHit() {}
-
- __forceinline BezierCurveHit(const float t, const float u, const Vec3fa& Ng)
- : t(t), u(u), v(0.0f), Ng(Ng) {}
-
- __forceinline BezierCurveHit(const float t, const float u, const float v, const Vec3fa& Ng)
- : t(t), u(u), v(v), Ng(Ng) {}
-
- __forceinline void finalize() {}
-
- public:
- float t;
- float u;
- float v;
- Vec3fa Ng;
- };
-
- template<typename NativeCurve3ff, typename Ray, typename Epilog>
- __forceinline bool intersect_bezier_iterative_debug(const Ray& ray, const float dt, const NativeCurve3ff& curve, size_t i,
- const vfloatx& u, const BBox<vfloatx>& tp, const BBox<vfloatx>& h0, const BBox<vfloatx>& h1,
- const Vec3vfx& Ng, const Vec4vfx& dP0du, const Vec4vfx& dP3du,
- const Epilog& epilog)
- {
- if (tp.lower[i]+dt > ray.tfar) return false;
- Vec3fa Ng_o = Vec3fa(Ng.x[i],Ng.y[i],Ng.z[i]);
- if (h0.lower[i] == tp.lower[i]) Ng_o = -Vec3fa(dP0du.x[i],dP0du.y[i],dP0du.z[i]);
- if (h1.lower[i] == tp.lower[i]) Ng_o = +Vec3fa(dP3du.x[i],dP3du.y[i],dP3du.z[i]);
- BezierCurveHit hit(tp.lower[i]+dt,u[i],Ng_o);
- return epilog(hit);
- }
-
- template<typename NativeCurve3ff, typename Ray, typename Epilog>
- __forceinline bool intersect_bezier_iterative_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, float u, float t, const Epilog& epilog)
- {
- const Vec3fa org = zero;
- const Vec3fa dir = ray.dir;
- const float length_ray_dir = length(dir);
-
- /* error of curve evaluations is propertional to largest coordinate */
- const BBox3ff box = curve.bounds();
- const float P_err = 16.0f*float(ulp)*reduce_max(max(abs(box.lower),abs(box.upper)));
-
- for (size_t i=0; i<numJacobianIterations; i++)
- {
- const Vec3fa Q = madd(Vec3fa(t),dir,org);
- //const Vec3fa dQdu = zero;
- const Vec3fa dQdt = dir;
- const float Q_err = 16.0f*float(ulp)*length_ray_dir*t; // works as org=zero here
-
- Vec3ff P,dPdu,ddPdu; curve.eval(u,P,dPdu,ddPdu);
- //const Vec3fa dPdt = zero;
-
- const Vec3fa R = Q-P;
- const float len_R = length(R); //reduce_max(abs(R));
- const float R_err = max(Q_err,P_err);
- const Vec3fa dRdu = /*dQdu*/-dPdu;
- const Vec3fa dRdt = dQdt;//-dPdt;
-
- const Vec3fa T = normalize(dPdu);
- const Vec3fa dTdu = dnormalize(dPdu,ddPdu);
- //const Vec3fa dTdt = zero;
- const float cos_err = P_err/length(dPdu);
-
- /* Error estimate for dot(R,T):
-
- dot(R,T) = cos(R,T) |R| |T|
- = (cos(R,T) +- cos_error) * (|R| +- |R|_err) * (|T| +- |T|_err)
- = cos(R,T)*|R|*|T|
- +- cos(R,T)*(|R|*|T|_err + |T|*|R|_err)
- +- cos_error*(|R| + |T|)
- +- lower order terms
- with cos(R,T) being in [0,1] and |T| = 1 we get:
- dot(R,T)_err = |R|*|T|_err + |R|_err = cos_error*(|R|+1)
- */
-
- const float f = dot(R,T);
- const float f_err = len_R*P_err + R_err + cos_err*(1.0f+len_R);
- const float dfdu = dot(dRdu,T) + dot(R,dTdu);
- const float dfdt = dot(dRdt,T);// + dot(R,dTdt);
-
- const float K = dot(R,R)-sqr(f);
- const float dKdu = /*2.0f*/(dot(R,dRdu)-f*dfdu);
- const float dKdt = /*2.0f*/(dot(R,dRdt)-f*dfdt);
- const float rsqrt_K = rsqrt(K);
-
- const float g = sqrt(K)-P.w;
- const float g_err = R_err + f_err + 16.0f*float(ulp)*box.upper.w;
- const float dgdu = /*0.5f*/dKdu*rsqrt_K-dPdu.w;
- const float dgdt = /*0.5f*/dKdt*rsqrt_K;//-dPdt.w;
-
- const LinearSpace2f J = LinearSpace2f(dfdu,dfdt,dgdu,dgdt);
- const Vec2f dut = rcp(J)*Vec2f(f,g);
- const Vec2f ut = Vec2f(u,t) - dut;
- u = ut.x; t = ut.y;
-
- if (abs(f) < f_err && abs(g) < g_err)
- {
- t+=dt;
- if (!(ray.tnear() <= t && t <= ray.tfar)) return false; // rejects NaNs
- if (!(u >= 0.0f && u <= 1.0f)) return false; // rejects NaNs
- const Vec3fa R = normalize(Q-P);
- const Vec3fa U = madd(Vec3fa(dPdu.w),R,dPdu);
- const Vec3fa V = cross(dPdu,R);
- BezierCurveHit hit(t,u,cross(V,U));
- return epilog(hit);
- }
- }
- return false;
- }
-
- template<typename NativeCurve3ff, typename Ray, typename Epilog>
- bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve,
- float u0, float u1, unsigned int depth, const Epilog& epilog)
- {
-#if defined(__AVX__)
- typedef vbool8 vboolx; // maximally 8-wide to work around KNL issues
- typedef vint8 vintx;
- typedef vfloat8 vfloatx;
-#else
- typedef vbool4 vboolx;
- typedef vint4 vintx;
- typedef vfloat4 vfloatx;
-#endif
- typedef Vec3<vfloatx> Vec3vfx;
- typedef Vec4<vfloatx> Vec4vfx;
-
- unsigned int maxDepth = numBezierSubdivisions;
- bool found = false;
- const Vec3fa org = zero;
- const Vec3fa dir = ray.dir;
-
- unsigned int sptr = 0;
- const unsigned int stack_size = numBezierSubdivisions+1; // +1 because of unstable workaround below
- struct StackEntry {
- vboolx valid;
- vfloatx tlower;
- float u0;
- float u1;
- unsigned int depth;
- };
- StackEntry stack[stack_size];
- goto entry;
-
- /* terminate if stack is empty */
- while (sptr)
- {
- /* pop from stack */
- {
- sptr--;
- vboolx valid = stack[sptr].valid;
- const vfloatx tlower = stack[sptr].tlower;
- valid &= tlower+dt <= ray.tfar;
- if (none(valid)) continue;
- u0 = stack[sptr].u0;
- u1 = stack[sptr].u1;
- depth = stack[sptr].depth;
- const size_t i = select_min(valid,tlower); clear(valid,i);
- stack[sptr].valid = valid;
- if (any(valid)) sptr++; // there are still items on the stack
-
- /* process next segment */
- const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1)));
- u0 = vu0[i+0];
- u1 = vu0[i+1];
- }
- entry:
-
- /* subdivide curve */
- const float dscale = (u1-u0)*(1.0f/(3.0f*(vfloatx::size-1)));
- const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1)));
- Vec4vfx P0, dP0du; curve.veval(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx(dscale);
- const Vec4vfx P3 = shift_right_1(P0);
- const Vec4vfx dP3du = shift_right_1(dP0du);
- const Vec4vfx P1 = P0 + dP0du;
- const Vec4vfx P2 = P3 - dP3du;
-
- /* calculate bounding cylinders */
- const vfloatx rr1 = sqr_point_to_line_distance(Vec3vfx(dP0du),Vec3vfx(P3-P0));
- const vfloatx rr2 = sqr_point_to_line_distance(Vec3vfx(dP3du),Vec3vfx(P3-P0));
- const vfloatx maxr12 = sqrt(max(rr1,rr2));
- const vfloatx one_plus_ulp = 1.0f+2.0f*float(ulp);
- const vfloatx one_minus_ulp = 1.0f-2.0f*float(ulp);
- vfloatx r_outer = max(P0.w,P1.w,P2.w,P3.w)+maxr12;
- vfloatx r_inner = min(P0.w,P1.w,P2.w,P3.w)-maxr12;
- r_outer = one_plus_ulp*r_outer;
- r_inner = max(0.0f,one_minus_ulp*r_inner);
- const CylinderN<vfloatx::size> cylinder_outer(Vec3vfx(P0),Vec3vfx(P3),r_outer);
- const CylinderN<vfloatx::size> cylinder_inner(Vec3vfx(P0),Vec3vfx(P3),r_inner);
- vboolx valid = true; clear(valid,vfloatx::size-1);
-
- /* intersect with outer cylinder */
- BBox<vfloatx> tc_outer; vfloatx u_outer0; Vec3vfx Ng_outer0; vfloatx u_outer1; Vec3vfx Ng_outer1;
- valid &= cylinder_outer.intersect(org,dir,tc_outer,u_outer0,Ng_outer0,u_outer1,Ng_outer1);
- if (none(valid)) continue;
-
- /* intersect with cap-planes */
- BBox<vfloatx> tp(ray.tnear()-dt,ray.tfar-dt);
- tp = embree::intersect(tp,tc_outer);
- BBox<vfloatx> h0 = HalfPlaneN<vfloatx::size>(Vec3vfx(P0),+Vec3vfx(dP0du)).intersect(org,dir);
- tp = embree::intersect(tp,h0);
- BBox<vfloatx> h1 = HalfPlaneN<vfloatx::size>(Vec3vfx(P3),-Vec3vfx(dP3du)).intersect(org,dir);
- tp = embree::intersect(tp,h1);
- valid &= tp.lower <= tp.upper;
- if (none(valid)) continue;
-
- /* clamp and correct u parameter */
- u_outer0 = clamp(u_outer0,vfloatx(0.0f),vfloatx(1.0f));
- u_outer1 = clamp(u_outer1,vfloatx(0.0f),vfloatx(1.0f));
- u_outer0 = lerp(u0,u1,(vfloatx(step)+u_outer0)*(1.0f/float(vfloatx::size)));
- u_outer1 = lerp(u0,u1,(vfloatx(step)+u_outer1)*(1.0f/float(vfloatx::size)));
-
- /* intersect with inner cylinder */
- BBox<vfloatx> tc_inner;
- vfloatx u_inner0 = zero; Vec3vfx Ng_inner0 = zero; vfloatx u_inner1 = zero; Vec3vfx Ng_inner1 = zero;
- const vboolx valid_inner = cylinder_inner.intersect(org,dir,tc_inner,u_inner0,Ng_inner0,u_inner1,Ng_inner1);
-
- /* at the unstable area we subdivide deeper */
- const vboolx unstable0 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner0)) < 0.3f);
- const vboolx unstable1 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner1)) < 0.3f);
-
- /* subtract the inner interval from the current hit interval */
- BBox<vfloatx> tp0, tp1;
- subtract(tp,tc_inner,tp0,tp1);
- vboolx valid0 = valid & (tp0.lower <= tp0.upper);
- vboolx valid1 = valid & (tp1.lower <= tp1.upper);
- if (none(valid0 | valid1)) continue;
-
- /* iterate over all first hits front to back */
- const vintx termDepth0 = select(unstable0,vintx(maxDepth+1),vintx(maxDepth));
- vboolx recursion_valid0 = valid0 & (depth < termDepth0);
- valid0 &= depth >= termDepth0;
-
- while (any(valid0))
- {
- const size_t i = select_min(valid0,tp0.lower); clear(valid0,i);
- found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0[i],tp0.lower[i],epilog);
- //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer0,tp0,h0,h1,Ng_outer0,dP0du,dP3du,epilog);
- valid0 &= tp0.lower+dt <= ray.tfar;
- }
- valid1 &= tp1.lower+dt <= ray.tfar;
-
- /* iterate over all second hits front to back */
- const vintx termDepth1 = select(unstable1,vintx(maxDepth+1),vintx(maxDepth));
- vboolx recursion_valid1 = valid1 & (depth < termDepth1);
- valid1 &= depth >= termDepth1;
- while (any(valid1))
- {
- const size_t i = select_min(valid1,tp1.lower); clear(valid1,i);
- found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1[i],tp1.upper[i],epilog);
- //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer1,tp1,h0,h1,Ng_outer1,dP0du,dP3du,epilog);
- valid1 &= tp1.lower+dt <= ray.tfar;
- }
-
- /* push valid segments to stack */
- recursion_valid0 &= tp0.lower+dt <= ray.tfar;
- recursion_valid1 &= tp1.lower+dt <= ray.tfar;
- const vboolx recursion_valid = recursion_valid0 | recursion_valid1;
- if (any(recursion_valid))
- {
- assert(sptr < stack_size);
- stack[sptr].valid = recursion_valid;
- stack[sptr].tlower = select(recursion_valid0,tp0.lower,tp1.lower);
- stack[sptr].u0 = u0;
- stack[sptr].u1 = u1;
- stack[sptr].depth = depth+1;
- sptr++;
- }
- }
- return found;
- }
-
- template<template<typename Ty> class NativeCurve>
- struct SweepCurve1Intersector1
- {
- typedef NativeCurve<Vec3ff> NativeCurve3ff;
-
- template<typename Epilog>
- __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
- const Epilog& epilog)
- {
- STAT3(normal.trav_prims,1,1,1);
-
- /* move ray closer to make intersection stable */
- NativeCurve3ff curve0(v0,v1,v2,v3);
- curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
- const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
- const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f);
- const NativeCurve3ff curve1 = curve0-ref;
- return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog);
- }
- };
-
- template<template<typename Ty> class NativeCurve, int K>
- struct SweepCurve1IntersectorK
- {
- typedef NativeCurve<Vec3ff> NativeCurve3ff;
-
- struct Ray1
- {
- __forceinline Ray1(RayK<K>& ray, size_t k)
- : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {}
-
- Vec3fa org;
- Vec3fa dir;
- float _tnear;
- float& tfar;
-
- __forceinline float& tnear() { return _tnear; }
- //__forceinline float& tfar() { return _tfar; }
- __forceinline const float& tnear() const { return _tnear; }
- //__forceinline const float& tfar() const { return _tfar; }
-
- };
-
- template<typename Epilog>
- __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
- const Epilog& epilog)
- {
- STAT3(normal.trav_prims,1,1,1);
- Ray1 ray(vray,k);
-
- /* move ray closer to make intersection stable */
- NativeCurve3ff curve0(v0,v1,v2,v3);
- curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
- const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
- const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f);
- const NativeCurve3ff curve1 = curve0-ref;
- return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual.h
deleted file mode 100644
index e1f4238130..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual.h
+++ /dev/null
@@ -1,671 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-#include "../subdiv/bezier_curve.h"
-#include "../common/primref.h"
-#include "curve_intersector_precalculations.h"
-#include "../bvh/node_intersector1.h"
-#include "../bvh/node_intersector_packet.h"
-
-#include "intersector_epilog.h"
-
-#include "../subdiv/bezier_curve.h"
-#include "../subdiv/bspline_curve.h"
-#include "../subdiv/hermite_curve.h"
-#include "../subdiv/catmullrom_curve.h"
-
-#include "spherei_intersector.h"
-#include "disci_intersector.h"
-
-#include "linei_intersector.h"
-#include "roundlinei_intersector.h"
-#include "conelinei_intersector.h"
-
-#include "curveNi_intersector.h"
-#include "curveNv_intersector.h"
-#include "curveNi_mb_intersector.h"
-
-#include "curve_intersector_distance.h"
-#include "curve_intersector_ribbon.h"
-#include "curve_intersector_oriented.h"
-#include "curve_intersector_sweep.h"
-
-namespace embree
-{
- struct VirtualCurveIntersector
- {
- typedef void (*Intersect1Ty)(void* pre, void* ray, IntersectContext* context, const void* primitive);
- typedef bool (*Occluded1Ty )(void* pre, void* ray, IntersectContext* context, const void* primitive);
-
- typedef void (*Intersect4Ty)(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive);
- typedef bool (*Occluded4Ty) (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive);
-
- typedef void (*Intersect8Ty)(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive);
- typedef bool (*Occluded8Ty) (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive);
-
- typedef void (*Intersect16Ty)(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive);
- typedef bool (*Occluded16Ty) (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive);
-
- public:
- struct Intersectors
- {
- Intersectors() {} // WARNING: Do not zero initialize this, as we otherwise get problems with thread unsafe local static variable initialization (e.g. on VS2013) in curve_intersector_virtual.cpp.
-
- template<int K> void intersect(void* pre, void* ray, IntersectContext* context, const void* primitive);
- template<int K> bool occluded (void* pre, void* ray, IntersectContext* context, const void* primitive);
-
- template<int K> void intersect(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive);
- template<int K> bool occluded (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive);
-
- public:
- Intersect1Ty intersect1;
- Occluded1Ty occluded1;
- Intersect4Ty intersect4;
- Occluded4Ty occluded4;
- Intersect8Ty intersect8;
- Occluded8Ty occluded8;
- Intersect16Ty intersect16;
- Occluded16Ty occluded16;
- };
-
- Intersectors vtbl[Geometry::GTY_END];
- };
-
- template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<1> (void* pre, void* ray, IntersectContext* context, const void* primitive) { assert(intersect1); intersect1(pre,ray,context,primitive); }
- template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<1> (void* pre, void* ray, IntersectContext* context, const void* primitive) { assert(occluded1); return occluded1(pre,ray,context,primitive); }
-
- template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<4>(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(intersect4); intersect4(pre,ray,k,context,primitive); }
- template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<4> (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(occluded4); return occluded4(pre,ray,k,context,primitive); }
-
-#if defined(__AVX__)
- template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<8>(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(intersect8); intersect8(pre,ray,k,context,primitive); }
- template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<8> (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(occluded8); return occluded8(pre,ray,k,context,primitive); }
-#endif
-
-#if defined(__AVX512F__)
- template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<16>(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(intersect16); intersect16(pre,ray,k,context,primitive); }
- template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<16> (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(occluded16); return occluded16(pre,ray,k,context,primitive); }
-#endif
-
- namespace isa
- {
- struct VirtualCurveIntersector1
- {
- typedef unsigned char Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
- leafIntersector.intersect<1>(&pre,&ray,context,prim);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
- return leafIntersector.occluded<1>(&pre,&ray,context,prim);
- }
- };
-
- template<int K>
- struct VirtualCurveIntersectorK
- {
- typedef unsigned char Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- template<bool robust>
- static __forceinline void intersect(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
- size_t mask = movemask(valid_i);
- while (mask) leafIntersector.intersect<K>(&pre,&ray,bscf(mask),context,prim);
- }
-
- template<bool robust>
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
- vbool<K> valid_o = false;
- size_t mask = movemask(valid_i);
- while (mask) {
- size_t k = bscf(mask);
- if (leafIntersector.occluded<K>(&pre,&ray,k,context,prim))
- set(valid_o, k);
- }
- return valid_o;
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
- leafIntersector.intersect<K>(&pre,&ray,k,context,prim);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
- return leafIntersector.occluded<K>(&pre,&ray,k,context,prim);
- }
- };
-
- template<int N>
- static VirtualCurveIntersector::Intersectors LinearRoundConeNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &RoundLinearCurveMiIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &RoundLinearCurveMiIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &RoundLinearCurveMiIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &RoundLinearCurveMiIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&RoundLinearCurveMiIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &RoundLinearCurveMiIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&RoundLinearCurveMiIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &RoundLinearCurveMiIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors LinearConeNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &ConeCurveMiIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &ConeCurveMiIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &ConeCurveMiIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &ConeCurveMiIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&ConeCurveMiIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &ConeCurveMiIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&ConeCurveMiIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &ConeCurveMiIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors LinearRoundConeNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &RoundLinearCurveMiMBIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &RoundLinearCurveMiMBIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &RoundLinearCurveMiMBIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &RoundLinearCurveMiMBIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&RoundLinearCurveMiMBIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &RoundLinearCurveMiMBIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&RoundLinearCurveMiMBIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &RoundLinearCurveMiMBIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors LinearConeNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &ConeCurveMiMBIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &ConeCurveMiMBIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &ConeCurveMiMBIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &ConeCurveMiMBIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&ConeCurveMiMBIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &ConeCurveMiMBIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&ConeCurveMiMBIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &ConeCurveMiMBIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
-
- template<int N>
- static VirtualCurveIntersector::Intersectors LinearRibbonNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &FlatLinearCurveMiIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &FlatLinearCurveMiIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &FlatLinearCurveMiIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &FlatLinearCurveMiIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&FlatLinearCurveMiIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &FlatLinearCurveMiIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&FlatLinearCurveMiIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &FlatLinearCurveMiIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors LinearRibbonNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &FlatLinearCurveMiMBIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &FlatLinearCurveMiMBIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &FlatLinearCurveMiMBIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &FlatLinearCurveMiMBIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&FlatLinearCurveMiMBIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &FlatLinearCurveMiMBIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&FlatLinearCurveMiMBIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &FlatLinearCurveMiMBIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors SphereNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &SphereMiIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &SphereMiIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &SphereMiIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &SphereMiIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&SphereMiIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &SphereMiIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&SphereMiIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &SphereMiIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors SphereNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &SphereMiMBIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &SphereMiMBIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &SphereMiMBIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &SphereMiMBIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&SphereMiMBIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &SphereMiMBIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&SphereMiMBIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &SphereMiMBIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors DiscNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &DiscMiIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &DiscMiIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &DiscMiIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &DiscMiIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&DiscMiIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &DiscMiIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&DiscMiIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &DiscMiIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors DiscNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &DiscMiMBIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &DiscMiMBIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &DiscMiMBIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &DiscMiMBIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&DiscMiMBIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &DiscMiMBIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&DiscMiMBIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &DiscMiMBIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors OrientedDiscNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &OrientedDiscMiIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &OrientedDiscMiIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &OrientedDiscMiIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &OrientedDiscMiIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&OrientedDiscMiIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &OrientedDiscMiIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&OrientedDiscMiIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &OrientedDiscMiIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors OrientedDiscNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &OrientedDiscMiMBIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &OrientedDiscMiMBIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &OrientedDiscMiMBIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &OrientedDiscMiMBIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&OrientedDiscMiMBIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &OrientedDiscMiMBIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&OrientedDiscMiMBIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &OrientedDiscMiMBIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors RibbonNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_t<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_t <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &CurveNiIntersectorK<N,4>::template intersect_t<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_t <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_t<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_t <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_t<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_t <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors RibbonNvIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNvIntersector1<N>::template intersect_t<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNvIntersector1<N>::template occluded_t <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &CurveNvIntersectorK<N,4>::template intersect_t<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNvIntersectorK<N,4>::template occluded_t <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNvIntersectorK<N,8>::template intersect_t<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNvIntersectorK<N,8>::template occluded_t <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNvIntersectorK<N,16>::template intersect_t<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNvIntersectorK<N,16>::template occluded_t <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors RibbonNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_t<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_t <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &CurveNiMBIntersectorK<N,4>::template intersect_t<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_t <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_t<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_t <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_t<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_t <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors CurveNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_t<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_t <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_t<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_t <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_t<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_t <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_t<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_t <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors CurveNvIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNvIntersector1<N>::template intersect_t<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNvIntersector1<N>::template occluded_t <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNvIntersectorK<N,4>::template intersect_t<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNvIntersectorK<N,4>::template occluded_t <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNvIntersectorK<N,8>::template intersect_t<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNvIntersectorK<N,8>::template occluded_t <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNvIntersectorK<N,16>::template intersect_t<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNvIntersectorK<N,16>::template occluded_t <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors CurveNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_t<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_t <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_t<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_t <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_t<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_t <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_t<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_t <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors OrientedCurveNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_n<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_n <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_n<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_n <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_n<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_n <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_n<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_n <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors OrientedCurveNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_n<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_n <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_n<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_n <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_n<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_n <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_n<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_n <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors HermiteRibbonNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_h<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_h <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_h<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_h <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_h<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_h <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_h<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_h <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors HermiteRibbonNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_h<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_h <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_h<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_h <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_h<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_h <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_h<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_h <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors HermiteCurveNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_h<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_h <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_h<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_h <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_h<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_h <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_h<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_h <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors HermiteCurveNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_h<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_h <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_h<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_h <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_h<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_h <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_h<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_h <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors HermiteOrientedCurveNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_hn<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_hn <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_hn<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_hn <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_hn<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_hn <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_hn<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_hn <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors HermiteOrientedCurveNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_hn<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_hn <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_hn<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_hn <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_hn<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_hn <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_hn<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_hn <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h
deleted file mode 100644
index 69cf612275..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2020 Light Transport Entertainment Inc.
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curve_intersector_virtual.h"
-
-namespace embree
-{
- namespace isa
- {
- void AddVirtualCurveBezierCurveInterector4i(VirtualCurveIntersector &prim);
- void AddVirtualCurveBezierCurveInterector4v(VirtualCurveIntersector &prim);
- void AddVirtualCurveBezierCurveInterector4iMB(VirtualCurveIntersector &prim);
-#if defined(__AVX__)
- void AddVirtualCurveBezierCurveInterector8i(VirtualCurveIntersector &prim);
- void AddVirtualCurveBezierCurveInterector8v(VirtualCurveIntersector &prim);
- void AddVirtualCurveBezierCurveInterector8iMB(VirtualCurveIntersector &prim);
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h
deleted file mode 100644
index d37e41098e..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2020 Light Transport Entertainment Inc.
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curve_intersector_virtual.h"
-
-namespace embree
-{
- namespace isa
- {
- void AddVirtualCurveBSplineCurveInterector4i(VirtualCurveIntersector &prim);
- void AddVirtualCurveBSplineCurveInterector4v(VirtualCurveIntersector &prim);
- void AddVirtualCurveBSplineCurveInterector4iMB(VirtualCurveIntersector &prim);
-#if defined(__AVX__)
- void AddVirtualCurveBSplineCurveInterector8i(VirtualCurveIntersector &prim);
- void AddVirtualCurveBSplineCurveInterector8v(VirtualCurveIntersector &prim);
- void AddVirtualCurveBSplineCurveInterector8iMB(VirtualCurveIntersector &prim);
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h
deleted file mode 100644
index a133a11d63..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2020 Light Transport Entertainment Inc.
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curve_intersector_virtual.h"
-
-namespace embree
-{
- namespace isa
- {
- void AddVirtualCurveCatmullRomCurveInterector4i(VirtualCurveIntersector &prim);
- void AddVirtualCurveCatmullRomCurveInterector4v(VirtualCurveIntersector &prim);
- void AddVirtualCurveCatmullRomCurveInterector4iMB(VirtualCurveIntersector &prim);
-#if defined(__AVX__)
- void AddVirtualCurveCatmullRomCurveInterector8i(VirtualCurveIntersector &prim);
- void AddVirtualCurveCatmullRomCurveInterector8v(VirtualCurveIntersector &prim);
- void AddVirtualCurveCatmullRomCurveInterector8iMB(VirtualCurveIntersector &prim);
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h
deleted file mode 100644
index 9aec35da45..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2020 Light Transport Entertainment Inc.
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curve_intersector_virtual.h"
-
-namespace embree
-{
- namespace isa
- {
- void AddVirtualCurveHermiteCurveInterector4i(VirtualCurveIntersector &prim);
- void AddVirtualCurveHermiteCurveInterector4v(VirtualCurveIntersector &prim);
- void AddVirtualCurveHermiteCurveInterector4iMB(VirtualCurveIntersector &prim);
-#if defined(__AVX__)
- void AddVirtualCurveHermiteCurveInterector8i(VirtualCurveIntersector &prim);
- void AddVirtualCurveHermiteCurveInterector8v(VirtualCurveIntersector &prim);
- void AddVirtualCurveHermiteCurveInterector8iMB(VirtualCurveIntersector &prim);
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h
deleted file mode 100644
index dd37d194f5..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2020 Light Transport Entertainment Inc.
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curve_intersector_virtual.h"
-
-namespace embree
-{
- namespace isa
- {
- void AddVirtualCurveLinearCurveInterector4i(VirtualCurveIntersector &prim);
- void AddVirtualCurveLinearCurveInterector4v(VirtualCurveIntersector &prim);
- void AddVirtualCurveLinearCurveInterector4iMB(VirtualCurveIntersector &prim);
-#if defined(__AVX__)
- void AddVirtualCurveLinearCurveInterector8i(VirtualCurveIntersector &prim);
- void AddVirtualCurveLinearCurveInterector8v(VirtualCurveIntersector &prim);
- void AddVirtualCurveLinearCurveInterector8iMB(VirtualCurveIntersector &prim);
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h
deleted file mode 100644
index fe5ceed840..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2020 Light Transport Entertainment Inc.
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curve_intersector_virtual.h"
-
-namespace embree
-{
- namespace isa
- {
- void AddVirtualCurvePointInterector4i(VirtualCurveIntersector &prim);
- void AddVirtualCurvePointInterector4v(VirtualCurveIntersector &prim);
- void AddVirtualCurvePointInterector4iMB(VirtualCurveIntersector &prim);
-
-#if defined (__AVX__)
- void AddVirtualCurvePointInterector8i(VirtualCurveIntersector &prim);
- void AddVirtualCurvePointInterector8v(VirtualCurveIntersector &prim);
- void AddVirtualCurvePointInterector8iMB(VirtualCurveIntersector &prim);
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/cylinder.h b/thirdparty/embree-aarch64/kernels/geometry/cylinder.h
deleted file mode 100644
index 39a582864c..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/cylinder.h
+++ /dev/null
@@ -1,223 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-
-namespace embree
-{
- namespace isa
- {
- struct Cylinder
- {
- const Vec3fa p0; //!< start location
- const Vec3fa p1; //!< end position
- const float rr; //!< squared radius of cylinder
-
- __forceinline Cylinder(const Vec3fa& p0, const Vec3fa& p1, const float r)
- : p0(p0), p1(p1), rr(sqr(r)) {}
-
- __forceinline Cylinder(const Vec3fa& p0, const Vec3fa& p1, const float rr, bool)
- : p0(p0), p1(p1), rr(rr) {}
-
- __forceinline bool intersect(const Vec3fa& org,
- const Vec3fa& dir,
- BBox1f& t_o,
- float& u0_o, Vec3fa& Ng0_o,
- float& u1_o, Vec3fa& Ng1_o) const
- {
- /* calculate quadratic equation to solve */
- const float rl = rcp_length(p1-p0);
- const Vec3fa P0 = p0, dP = (p1-p0)*rl;
- const Vec3fa O = org-P0, dO = dir;
-
- const float dOdO = dot(dO,dO);
- const float OdO = dot(dO,O);
- const float OO = dot(O,O);
- const float dOz = dot(dP,dO);
- const float Oz = dot(dP,O);
-
- const float A = dOdO - sqr(dOz);
- const float B = 2.0f * (OdO - dOz*Oz);
- const float C = OO - sqr(Oz) - rr;
-
- /* we miss the cylinder if determinant is smaller than zero */
- const float D = B*B - 4.0f*A*C;
- if (D < 0.0f) {
- t_o = BBox1f(pos_inf,neg_inf);
- return false;
- }
-
- /* special case for rays that are parallel to the cylinder */
- const float eps = 16.0f*float(ulp)*max(abs(dOdO),abs(sqr(dOz)));
- if (abs(A) < eps)
- {
- if (C <= 0.0f) {
- t_o = BBox1f(neg_inf,pos_inf);
- return true;
- } else {
- t_o = BBox1f(pos_inf,neg_inf);
- return false;
- }
- }
-
- /* standard case for rays that are not parallel to the cylinder */
- const float Q = sqrt(D);
- const float rcp_2A = rcp(2.0f*A);
- const float t0 = (-B-Q)*rcp_2A;
- const float t1 = (-B+Q)*rcp_2A;
-
- /* calculates u and Ng for near hit */
- {
- u0_o = madd(t0,dOz,Oz)*rl;
- const Vec3fa Pr = t0*dir;
- const Vec3fa Pl = madd(u0_o,p1-p0,p0);
- Ng0_o = Pr-Pl;
- }
-
- /* calculates u and Ng for far hit */
- {
- u1_o = madd(t1,dOz,Oz)*rl;
- const Vec3fa Pr = t1*dir;
- const Vec3fa Pl = madd(u1_o,p1-p0,p0);
- Ng1_o = Pr-Pl;
- }
-
- t_o.lower = t0;
- t_o.upper = t1;
- return true;
- }
-
- __forceinline bool intersect(const Vec3fa& org_i, const Vec3fa& dir, BBox1f& t_o) const
- {
- float u0_o; Vec3fa Ng0_o;
- float u1_o; Vec3fa Ng1_o;
- return intersect(org_i,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o);
- }
-
- static bool verify(const size_t id, const Cylinder& cylinder, const RayHit& ray, bool shouldhit, const float t0, const float t1)
- {
- float eps = 0.001f;
- BBox1f t; bool hit;
- hit = cylinder.intersect(ray.org,ray.dir,t);
-
- bool failed = hit != shouldhit;
- if (shouldhit) failed |= std::isinf(t0) ? t0 != t.lower : abs(t0-t.lower) > eps;
- if (shouldhit) failed |= std::isinf(t1) ? t1 != t.upper : abs(t1-t.upper) > eps;
- if (!failed) return true;
- embree_cout << "Cylinder test " << id << " failed: cylinder = " << cylinder << ", ray = " << ray << ", hit = " << hit << ", t = " << t << embree_endl;
- return false;
- }
-
- /* verify cylinder class */
- static bool verify()
- {
- bool passed = true;
- const Cylinder cylinder(Vec3fa(0.0f,0.0f,0.0f),Vec3fa(1.0f,0.0f,0.0f),1.0f);
- passed &= verify(0,cylinder,RayHit(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f);
- passed &= verify(1,cylinder,RayHit(Vec3fa(+2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f);
- passed &= verify(2,cylinder,RayHit(Vec3fa(+2.0f,1.0f,2.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),false,0.0f,0.0f);
- passed &= verify(3,cylinder,RayHit(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf);
- passed &= verify(4,cylinder,RayHit(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf);
- passed &= verify(5,cylinder,RayHit(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf);
- passed &= verify(6,cylinder,RayHit(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf);
- return passed;
- }
-
- /*! output operator */
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const Cylinder& c) {
- return cout << "Cylinder { p0 = " << c.p0 << ", p1 = " << c.p1 << ", r = " << sqrtf(c.rr) << "}";
- }
- };
-
- template<int N>
- struct CylinderN
- {
- const Vec3vf<N> p0; //!< start location
- const Vec3vf<N> p1; //!< end position
- const vfloat<N> rr; //!< squared radius of cylinder
-
- __forceinline CylinderN(const Vec3vf<N>& p0, const Vec3vf<N>& p1, const vfloat<N>& r)
- : p0(p0), p1(p1), rr(sqr(r)) {}
-
- __forceinline CylinderN(const Vec3vf<N>& p0, const Vec3vf<N>& p1, const vfloat<N>& rr, bool)
- : p0(p0), p1(p1), rr(rr) {}
-
-
- __forceinline vbool<N> intersect(const Vec3fa& org, const Vec3fa& dir,
- BBox<vfloat<N>>& t_o,
- vfloat<N>& u0_o, Vec3vf<N>& Ng0_o,
- vfloat<N>& u1_o, Vec3vf<N>& Ng1_o) const
- {
- /* calculate quadratic equation to solve */
- const vfloat<N> rl = rcp_length(p1-p0);
- const Vec3vf<N> P0 = p0, dP = (p1-p0)*rl;
- const Vec3vf<N> O = Vec3vf<N>(org)-P0, dO = dir;
-
- const vfloat<N> dOdO = dot(dO,dO);
- const vfloat<N> OdO = dot(dO,O);
- const vfloat<N> OO = dot(O,O);
- const vfloat<N> dOz = dot(dP,dO);
- const vfloat<N> Oz = dot(dP,O);
-
- const vfloat<N> A = dOdO - sqr(dOz);
- const vfloat<N> B = 2.0f * (OdO - dOz*Oz);
- const vfloat<N> C = OO - sqr(Oz) - rr;
-
- /* we miss the cylinder if determinant is smaller than zero */
- const vfloat<N> D = B*B - 4.0f*A*C;
- vbool<N> valid = D >= 0.0f;
- if (none(valid)) {
- t_o = BBox<vfloat<N>>(empty);
- return valid;
- }
-
- /* standard case for rays that are not parallel to the cylinder */
- const vfloat<N> Q = sqrt(D);
- const vfloat<N> rcp_2A = rcp(2.0f*A);
- const vfloat<N> t0 = (-B-Q)*rcp_2A;
- const vfloat<N> t1 = (-B+Q)*rcp_2A;
-
- /* calculates u and Ng for near hit */
- {
- u0_o = madd(t0,dOz,Oz)*rl;
- const Vec3vf<N> Pr = t0*Vec3vf<N>(dir);
- const Vec3vf<N> Pl = madd(u0_o,p1-p0,p0);
- Ng0_o = Pr-Pl;
- }
-
- /* calculates u and Ng for far hit */
- {
- u1_o = madd(t1,dOz,Oz)*rl;
- const Vec3vf<N> Pr = t1*Vec3vf<N>(dir);
- const Vec3vf<N> Pl = madd(u1_o,p1-p0,p0);
- Ng1_o = Pr-Pl;
- }
-
- t_o.lower = select(valid, t0, vfloat<N>(pos_inf));
- t_o.upper = select(valid, t1, vfloat<N>(neg_inf));
-
- /* special case for rays that are parallel to the cylinder */
- const vfloat<N> eps = 16.0f*float(ulp)*max(abs(dOdO),abs(sqr(dOz)));
- vbool<N> validt = valid & (abs(A) < eps);
- if (unlikely(any(validt)))
- {
- vbool<N> inside = C <= 0.0f;
- t_o.lower = select(validt,select(inside,vfloat<N>(neg_inf),vfloat<N>(pos_inf)),t_o.lower);
- t_o.upper = select(validt,select(inside,vfloat<N>(pos_inf),vfloat<N>(neg_inf)),t_o.upper);
- valid &= !validt | inside;
- }
- return valid;
- }
-
- __forceinline vbool<N> intersect(const Vec3fa& org_i, const Vec3fa& dir, BBox<vfloat<N>>& t_o) const
- {
- vfloat<N> u0_o; Vec3vf<N> Ng0_o;
- vfloat<N> u1_o; Vec3vf<N> Ng1_o;
- return intersect(org_i,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o);
- }
- };
- }
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/geometry/disc_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/disc_intersector.h
deleted file mode 100644
index e8305780e5..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/disc_intersector.h
+++ /dev/null
@@ -1,216 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "../common/scene_points.h"
-#include "curve_intersector_precalculations.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct DiscIntersectorHitM
- {
- __forceinline DiscIntersectorHitM() {}
-
- __forceinline DiscIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng)
- : vu(u), vv(v), vt(t), vNg(Ng)
- {
- }
-
- __forceinline void finalize() {}
-
- __forceinline Vec2f uv(const size_t i) const
- {
- return Vec2f(vu[i], vv[i]);
- }
- __forceinline float t(const size_t i) const
- {
- return vt[i];
- }
- __forceinline Vec3fa Ng(const size_t i) const
- {
- return Vec3fa(vNg.x[i], vNg.y[i], vNg.z[i]);
- }
-
- public:
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- template<int M>
- struct DiscIntersector1
- {
- typedef CurvePrecalculations1 Precalculations;
-
- template<typename Epilog>
- static __forceinline bool intersect(
- const vbool<M>& valid_i,
- Ray& ray,
- IntersectContext* context,
- const Points* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i,
- const Epilog& epilog)
- {
- vbool<M> valid = valid_i;
-
- const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
- const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z);
- const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir));
-
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec3vf<M> center = v0.xyz();
- const vfloat<M> radius = v0.w;
-
- const Vec3vf<M> c0 = center - ray_org;
- const vfloat<M> projC0 = dot(c0, ray_dir) * rd2;
-
- valid &= (vfloat<M>(ray.tnear()) <= projC0) & (projC0 <= vfloat<M>(ray.tfar));
- if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f)
- valid &= projC0 > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR) * radius * pre.depth_scale; // ignore self intersections
- if (unlikely(none(valid)))
- return false;
-
- const Vec3vf<M> perp = c0 - projC0 * ray_dir;
- const vfloat<M> l2 = dot(perp, perp);
- const vfloat<M> r2 = radius * radius;
- valid &= (l2 <= r2);
- if (unlikely(none(valid)))
- return false;
-
- DiscIntersectorHitM<M> hit(zero, zero, projC0, -ray_dir);
- return epilog(valid, hit);
- }
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- Ray& ray,
- IntersectContext* context,
- const Points* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i,
- const Vec3vf<M>& normal,
- const Epilog& epilog)
- {
- vbool<M> valid = valid_i;
- const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
-
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec3vf<M> center = v0.xyz();
- const vfloat<M> radius = v0.w;
-
- vfloat<M> divisor = dot(Vec3vf<M>((Vec3fa)ray.dir), normal);
- const vbool<M> parallel = divisor == vfloat<M>(0.f);
- valid &= !parallel;
- divisor = select(parallel, 1.f, divisor); // prevent divide by zero
-
- vfloat<M> t = dot(center - Vec3vf<M>((Vec3fa)ray.org), Vec3vf<M>(normal)) / divisor;
-
- valid &= (vfloat<M>(ray.tnear()) <= t) & (t <= vfloat<M>(ray.tfar));
- if (unlikely(none(valid)))
- return false;
-
- Vec3vf<M> intersection = Vec3vf<M>((Vec3fa)ray.org) + Vec3vf<M>((Vec3fa)ray.dir) * t;
- vfloat<M> dist2 = dot(intersection - center, intersection - center);
- valid &= dist2 < radius * radius;
- if (unlikely(none(valid)))
- return false;
-
- DiscIntersectorHitM<M> hit(zero, zero, t, normal);
- return epilog(valid, hit);
- }
- };
-
- template<int M, int K>
- struct DiscIntersectorK
- {
- typedef CurvePrecalculationsK<K> Precalculations;
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- RayK<K>& ray,
- size_t k,
- IntersectContext* context,
- const Points* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i,
- const Epilog& epilog)
- {
- vbool<M> valid = valid_i;
-
- const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]);
- const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir));
-
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec3vf<M> center = v0.xyz();
- const vfloat<M> radius = v0.w;
-
- const Vec3vf<M> c0 = center - ray_org;
- const vfloat<M> projC0 = dot(c0, ray_dir) * rd2;
-
- valid &= (vfloat<M>(ray.tnear()[k]) <= projC0) & (projC0 <= vfloat<M>(ray.tfar[k]));
- if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f)
- valid &= projC0 > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR) * radius * pre.depth_scale[k]; // ignore self intersections
- if (unlikely(none(valid)))
- return false;
-
- const Vec3vf<M> perp = c0 - projC0 * ray_dir;
- const vfloat<M> l2 = dot(perp, perp);
- const vfloat<M> r2 = radius * radius;
- valid &= (l2 <= r2);
- if (unlikely(none(valid)))
- return false;
-
- DiscIntersectorHitM<M> hit(zero, zero, projC0, -ray_dir);
- return epilog(valid, hit);
- }
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- RayK<K>& ray,
- size_t k,
- IntersectContext* context,
- const Points* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i,
- const Vec3vf<M>& normal,
- const Epilog& epilog)
- {
- vbool<M> valid = valid_i;
- const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]);
-
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec3vf<M> center = v0.xyz();
- const vfloat<M> radius = v0.w;
-
- vfloat<M> divisor = dot(Vec3vf<M>(ray_dir), normal);
- const vbool<M> parallel = divisor == vfloat<M>(0.f);
- valid &= !parallel;
- divisor = select(parallel, 1.f, divisor); // prevent divide by zero
-
- vfloat<M> t = dot(center - Vec3vf<M>(ray_org), Vec3vf<M>(normal)) / divisor;
-
- valid &= (vfloat<M>(ray.tnear()[k]) <= t) & (t <= vfloat<M>(ray.tfar[k]));
- if (unlikely(none(valid)))
- return false;
-
- Vec3vf<M> intersection = Vec3vf<M>(ray_org) + Vec3vf<M>(ray_dir) * t;
- vfloat<M> dist2 = dot(intersection - center, intersection - center);
- valid &= dist2 < radius * radius;
- if (unlikely(none(valid)))
- return false;
-
- DiscIntersectorHitM<M> hit(zero, zero, t, normal);
- return epilog(valid, hit);
- }
- };
- } // namespace isa
-} // namespace embree
diff --git a/thirdparty/embree-aarch64/kernels/geometry/disci_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/disci_intersector.h
deleted file mode 100644
index e1dc3aa98e..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/disci_intersector.h
+++ /dev/null
@@ -1,277 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "disc_intersector.h"
-#include "intersector_epilog.h"
-#include "pointi.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M, int Mx, bool filter>
- struct DiscMiIntersector1
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre,
- RayHit& ray,
- IntersectContext* context,
- const Primitive& Disc)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Disc.gather(v0, geom);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- DiscIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre,
- Ray& ray,
- IntersectContext* context,
- const Primitive& Disc)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Disc.gather(v0, geom);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- return DiscIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID()));
- }
- };
-
- template<int M, int Mx, bool filter>
- struct DiscMiMBIntersector1
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre,
- RayHit& ray,
- IntersectContext* context,
- const Primitive& Disc)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Disc.gather(v0, geom, ray.time());
- const vbool<Mx> valid = Disc.template valid<Mx>();
- DiscIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre,
- Ray& ray,
- IntersectContext* context,
- const Primitive& Disc)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Disc.gather(v0, geom, ray.time());
- const vbool<Mx> valid = Disc.template valid<Mx>();
- return DiscIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID()));
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct DiscMiIntersectorK
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(
- const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Disc.gather(v0, geom);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- DiscIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0,
- Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
- }
-
- static __forceinline bool occluded(
- const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Disc.gather(v0, geom);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- return DiscIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0,
- Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct DiscMiMBIntersectorK
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(
- const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()[k]);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- DiscIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0,
- Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
- }
-
- static __forceinline bool occluded(
- const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()[k]);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- return DiscIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0, Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
- }
- };
-
- template<int M, int Mx, bool filter>
- struct OrientedDiscMiIntersector1
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre,
- RayHit& ray,
- IntersectContext* context,
- const Primitive& Disc)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Vec3vf<M> n0;
- Disc.gather(v0, n0, geom);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- DiscIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, n0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre,
- Ray& ray,
- IntersectContext* context,
- const Primitive& Disc)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Vec3vf<M> n0;
- Disc.gather(v0, n0, geom);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- return DiscIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, n0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID()));
- }
- };
-
- template<int M, int Mx, bool filter>
- struct OrientedDiscMiMBIntersector1
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre,
- RayHit& ray,
- IntersectContext* context,
- const Primitive& Disc)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Vec3vf<M> n0;
- Disc.gather(v0, n0, geom, ray.time());
- const vbool<Mx> valid = Disc.template valid<Mx>();
- DiscIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, n0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre,
- Ray& ray,
- IntersectContext* context,
- const Primitive& Disc)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Vec3vf<M> n0;
- Disc.gather(v0, n0, geom, ray.time());
- const vbool<Mx> valid = Disc.template valid<Mx>();
- return DiscIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, n0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID()));
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct OrientedDiscMiIntersectorK
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(
- const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Vec3vf<M> n0;
- Disc.gather(v0, n0, geom);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- DiscIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0, n0,
- Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
- }
-
- static __forceinline bool occluded(
- const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Vec3vf<M> n0;
- Disc.gather(v0, n0, geom);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- return DiscIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0, n0,
- Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct OrientedDiscMiMBIntersectorK
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(
- const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Vec3vf<M> n0;
- Disc.gather(v0, n0, geom, ray.time()[k]);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- DiscIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0, n0,
- Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
- }
-
- static __forceinline bool occluded(
- const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Vec3vf<M> n0;
- Disc.gather(v0, n0, geom, ray.time()[k]);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- return DiscIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0, n0,
- Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
- }
- };
- } // namespace isa
-} // namespace embree
diff --git a/thirdparty/embree-aarch64/kernels/geometry/filter.h b/thirdparty/embree-aarch64/kernels/geometry/filter.h
deleted file mode 100644
index 4cdf7a395a..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/filter.h
+++ /dev/null
@@ -1,204 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/geometry.h"
-#include "../common/ray.h"
-#include "../common/hit.h"
-#include "../common/context.h"
-
-namespace embree
-{
- namespace isa
- {
- __forceinline bool runIntersectionFilter1Helper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, IntersectContext* context)
- {
- if (geometry->intersectionFilterN)
- {
- assert(context->scene->hasGeometryFilterFunction());
- geometry->intersectionFilterN(args);
-
- if (args->valid[0] == 0)
- return false;
- }
-
- if (context->user->filter) {
- assert(context->scene->hasContextFilterFunction());
- context->user->filter(args);
-
- if (args->valid[0] == 0)
- return false;
- }
-
- copyHitToRay(*(RayHit*)args->ray,*(Hit*)args->hit);
- return true;
- }
-
- __forceinline bool runIntersectionFilter1(const Geometry* const geometry, RayHit& ray, IntersectContext* context, Hit& hit)
- {
- RTCFilterFunctionNArguments args;
- int mask = -1;
- args.valid = &mask;
- args.geometryUserPtr = geometry->userPtr;
- args.context = context->user;
- args.ray = (RTCRayN*)&ray;
- args.hit = (RTCHitN*)&hit;
- args.N = 1;
- return runIntersectionFilter1Helper(&args,geometry,context);
- }
-
- __forceinline void reportIntersection1(IntersectFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args)
- {
-#if defined(EMBREE_FILTER_FUNCTION)
- IntersectContext* MAYBE_UNUSED context = args->internal_context;
- const Geometry* const geometry = args->geometry;
- if (geometry->intersectionFilterN) {
- assert(context->scene->hasGeometryFilterFunction());
- geometry->intersectionFilterN(filter_args);
- }
-
- //if (args->valid[0] == 0)
- // return;
-
- if (context->user->filter) {
- assert(context->scene->hasContextFilterFunction());
- context->user->filter(filter_args);
- }
-#endif
- }
-
- __forceinline bool runOcclusionFilter1Helper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, IntersectContext* context)
- {
- if (geometry->occlusionFilterN)
- {
- assert(context->scene->hasGeometryFilterFunction());
- geometry->occlusionFilterN(args);
-
- if (args->valid[0] == 0)
- return false;
- }
-
- if (context->user->filter) {
- assert(context->scene->hasContextFilterFunction());
- context->user->filter(args);
-
- if (args->valid[0] == 0)
- return false;
- }
- return true;
- }
-
- __forceinline bool runOcclusionFilter1(const Geometry* const geometry, Ray& ray, IntersectContext* context, Hit& hit)
- {
- RTCFilterFunctionNArguments args;
- int mask = -1;
- args.valid = &mask;
- args.geometryUserPtr = geometry->userPtr;
- args.context = context->user;
- args.ray = (RTCRayN*)&ray;
- args.hit = (RTCHitN*)&hit;
- args.N = 1;
- return runOcclusionFilter1Helper(&args,geometry,context);
- }
-
- __forceinline void reportOcclusion1(OccludedFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args)
- {
-#if defined(EMBREE_FILTER_FUNCTION)
- IntersectContext* MAYBE_UNUSED context = args->internal_context;
- const Geometry* const geometry = args->geometry;
- if (geometry->occlusionFilterN) {
- assert(context->scene->hasGeometryFilterFunction());
- geometry->occlusionFilterN(filter_args);
- }
-
- //if (args->valid[0] == 0)
- // return false;
-
- if (context->user->filter) {
- assert(context->scene->hasContextFilterFunction());
- context->user->filter(filter_args);
- }
-#endif
- }
-
- template<int K>
- __forceinline vbool<K> runIntersectionFilterHelper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, IntersectContext* context)
- {
- vint<K>* mask = (vint<K>*) args->valid;
- if (geometry->intersectionFilterN)
- {
- assert(context->scene->hasGeometryFilterFunction());
- geometry->intersectionFilterN(args);
- }
-
- vbool<K> valid_o = *mask != vint<K>(zero);
- if (none(valid_o)) return valid_o;
-
- if (context->user->filter) {
- assert(context->scene->hasContextFilterFunction());
- context->user->filter(args);
- }
-
- valid_o = *mask != vint<K>(zero);
- if (none(valid_o)) return valid_o;
-
- copyHitToRay(valid_o,*(RayHitK<K>*)args->ray,*(HitK<K>*)args->hit);
- return valid_o;
- }
-
- template<int K>
- __forceinline vbool<K> runIntersectionFilter(const vbool<K>& valid, const Geometry* const geometry, RayHitK<K>& ray, IntersectContext* context, HitK<K>& hit)
- {
- RTCFilterFunctionNArguments args;
- vint<K> mask = valid.mask32();
- args.valid = (int*)&mask;
- args.geometryUserPtr = geometry->userPtr;
- args.context = context->user;
- args.ray = (RTCRayN*)&ray;
- args.hit = (RTCHitN*)&hit;
- args.N = K;
- return runIntersectionFilterHelper<K>(&args,geometry,context);
- }
-
- template<int K>
- __forceinline vbool<K> runOcclusionFilterHelper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, IntersectContext* context)
- {
- vint<K>* mask = (vint<K>*) args->valid;
- if (geometry->occlusionFilterN)
- {
- assert(context->scene->hasGeometryFilterFunction());
- geometry->occlusionFilterN(args);
- }
-
- vbool<K> valid_o = *mask != vint<K>(zero);
-
- if (none(valid_o)) return valid_o;
-
- if (context->user->filter) {
- assert(context->scene->hasContextFilterFunction());
- context->user->filter(args);
- }
-
- valid_o = *mask != vint<K>(zero);
-
- RayK<K>* ray = (RayK<K>*) args->ray;
- ray->tfar = select(valid_o, vfloat<K>(neg_inf), ray->tfar);
- return valid_o;
- }
-
- template<int K>
- __forceinline vbool<K> runOcclusionFilter(const vbool<K>& valid, const Geometry* const geometry, RayK<K>& ray, IntersectContext* context, HitK<K>& hit)
- {
- RTCFilterFunctionNArguments args;
- vint<K> mask = valid.mask32();
- args.valid = (int*)&mask;
- args.geometryUserPtr = geometry->userPtr;
- args.context = context->user;
- args.ray = (RTCRayN*)&ray;
- args.hit = (RTCHitN*)&hit;
- args.N = K;
- return runOcclusionFilterHelper<K>(&args,geometry,context);
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/grid_intersector.h
deleted file mode 100644
index 46a0af0827..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/grid_intersector.h
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "grid_soa.h"
-#include "grid_soa_intersector1.h"
-#include "grid_soa_intersector_packet.h"
-#include "../common/ray.h"
-
-namespace embree
-{
- namespace isa
- {
- template<typename T>
- class SubdivPatch1Precalculations : public T
- {
- public:
- __forceinline SubdivPatch1Precalculations (const Ray& ray, const void* ptr)
- : T(ray,ptr) {}
- };
-
- template<int K, typename T>
- class SubdivPatch1PrecalculationsK : public T
- {
- public:
- __forceinline SubdivPatch1PrecalculationsK (const vbool<K>& valid, RayK<K>& ray)
- : T(valid,ray) {}
- };
-
- class Grid1Intersector1
- {
- public:
- typedef GridSOA Primitive;
- typedef Grid1Precalculations<GridSOAIntersector1::Precalculations> Precalculations;
-
- /*! Intersect a ray with the primitive. */
- static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
- {
- GridSOAIntersector1::intersect(pre,ray,context,prim,lazy_node);
- }
- static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, size_t& lazy_node) {
- intersect(pre,ray,context,prim,ty,lazy_node);
- }
-
- /*! Test if the ray is occluded by the primitive */
- static __forceinline bool occluded(Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
- {
- GridSOAIntersector1::occluded(pre,ray,context,prim,lazy_node);
- }
- static __forceinline bool occluded(Precalculations& pre, Ray& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, size_t& lazy_node) {
- return occluded(pre,ray,context,prim,ty,lazy_node);
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t ty, size_t& lazy_node) {
- assert(false && "not implemented");
- return false;
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, size_t& lazy_node) {
- assert(false && "not implemented");
- return false;
- }
- };
-
- template <int K>
- struct GridIntersectorK
- {
- typedef GridSOA Primitive;
- typedef SubdivPatch1PrecalculationsK<K,typename GridSOAIntersectorK<K>::Precalculations> Precalculations;
-
-
- static __forceinline void intersect(const vbool<K>& valid, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
- {
- GridSOAIntersectorK<K>::intersect(valid,pre,ray,context,prim,lazy_node);
- }
-
- static __forceinline vbool<K> occluded(const vbool<K>& valid, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
- {
- GridSOAIntersectorK<K>::occluded(valid,pre,ray,context,prim,lazy_node);
- }
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
- {
- GridSOAIntersectorK<K>::intersect(pre,ray,k,context,prim,lazy_node);
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
- {
- GridSOAIntersectorK<K>::occluded(pre,ray,k,context,prim,lazy_node);
- }
- };
-
- typedef Grid1IntersectorK<4> SubdivPatch1Intersector4;
- typedef Grid1IntersectorK<8> SubdivPatch1Intersector8;
- typedef Grid1IntersectorK<16> SubdivPatch1Intersector16;
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h b/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h
deleted file mode 100644
index d3b275586c..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h
+++ /dev/null
@@ -1,275 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "../common/scene_subdiv_mesh.h"
-#include "../bvh/bvh.h"
-#include "../subdiv/tessellation.h"
-#include "../subdiv/tessellation_cache.h"
-#include "subdivpatch1.h"
-
-namespace embree
-{
- namespace isa
- {
- class GridSOA
- {
- public:
-
- /*! GridSOA constructor */
- GridSOA(const SubdivPatch1Base* patches, const unsigned time_steps,
- const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
- const SubdivMesh* const geom, const size_t totalBvhBytes, const size_t gridBytes, BBox3fa* bounds_o = nullptr);
-
- /*! Subgrid creation */
- template<typename Allocator>
- static GridSOA* create(const SubdivPatch1Base* patches, const unsigned time_steps,
- unsigned x0, unsigned x1, unsigned y0, unsigned y1,
- const Scene* scene, Allocator& alloc, BBox3fa* bounds_o = nullptr)
- {
- const unsigned width = x1-x0+1;
- const unsigned height = y1-y0+1;
- const GridRange range(0,width-1,0,height-1);
- size_t bvhBytes = 0;
- if (time_steps == 1)
- bvhBytes = getBVHBytes(range,sizeof(BVH4::AABBNode),0);
- else {
- bvhBytes = (time_steps-1)*getBVHBytes(range,sizeof(BVH4::AABBNodeMB),0);
- bvhBytes += getTemporalBVHBytes(make_range(0,int(time_steps-1)),sizeof(BVH4::AABBNodeMB4D));
- }
- const size_t gridBytes = 4*size_t(width)*size_t(height)*sizeof(float);
- size_t rootBytes = time_steps*sizeof(BVH4::NodeRef);
-#if !defined(__X86_64__) && !defined(__aarch64__)
- rootBytes += 4; // We read 2 elements behind the grid. As we store at least 8 root bytes after the grid we are fine in 64 bit mode. But in 32 bit mode we have to do additional padding.
-#endif
- void* data = alloc(offsetof(GridSOA,data)+bvhBytes+time_steps*gridBytes+rootBytes);
- assert(data);
- return new (data) GridSOA(patches,time_steps,x0,x1,y0,y1,patches->grid_u_res,patches->grid_v_res,scene->get<SubdivMesh>(patches->geomID()),bvhBytes,gridBytes,bounds_o);
- }
-
- /*! Grid creation */
- template<typename Allocator>
- static GridSOA* create(const SubdivPatch1Base* const patches, const unsigned time_steps,
- const Scene* scene, const Allocator& alloc, BBox3fa* bounds_o = nullptr)
- {
- return create(patches,time_steps,0,patches->grid_u_res-1,0,patches->grid_v_res-1,scene,alloc,bounds_o);
- }
-
- /*! returns reference to root */
- __forceinline BVH4::NodeRef& root(size_t t = 0) { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }
- __forceinline const BVH4::NodeRef& root(size_t t = 0) const { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }
-
- /*! returns pointer to BVH array */
- __forceinline int8_t* bvhData() { return &data[0]; }
- __forceinline const int8_t* bvhData() const { return &data[0]; }
-
- /*! returns pointer to Grid array */
- __forceinline float* gridData(size_t t = 0) { return (float*) &data[gridOffset + t*gridBytes]; }
- __forceinline const float* gridData(size_t t = 0) const { return (float*) &data[gridOffset + t*gridBytes]; }
-
- __forceinline void* encodeLeaf(size_t u, size_t v) {
- return (void*) (16*(v * width + u + 1)); // +1 to not create empty leaf
- }
- __forceinline float* decodeLeaf(size_t t, const void* ptr) {
- return gridData(t) + (((size_t) (ptr) >> 4) - 1);
- }
-
- /*! returns the size of the BVH over the grid in bytes */
- static size_t getBVHBytes(const GridRange& range, const size_t nodeBytes, const size_t leafBytes);
-
- /*! returns the size of the temporal BVH over the time range BVHs */
- static size_t getTemporalBVHBytes(const range<int> time_range, const size_t nodeBytes);
-
- /*! calculates bounding box of grid range */
- __forceinline BBox3fa calculateBounds(size_t time, const GridRange& range) const
- {
- const float* const grid_array = gridData(time);
- const float* const grid_x_array = grid_array + 0 * dim_offset;
- const float* const grid_y_array = grid_array + 1 * dim_offset;
- const float* const grid_z_array = grid_array + 2 * dim_offset;
-
- /* compute the bounds just for the range! */
- BBox3fa bounds( empty );
- for (unsigned v = range.v_start; v<=range.v_end; v++)
- {
- for (unsigned u = range.u_start; u<=range.u_end; u++)
- {
- const float x = grid_x_array[ v * width + u];
- const float y = grid_y_array[ v * width + u];
- const float z = grid_z_array[ v * width + u];
- bounds.extend( Vec3fa(x,y,z) );
- }
- }
- assert(is_finite(bounds));
- return bounds;
- }
-
- /*! Evaluates grid over patch and builds BVH4 tree over the grid. */
- std::pair<BVH4::NodeRef,BBox3fa> buildBVH(BBox3fa* bounds_o);
-
- /*! Create BVH4 tree over grid. */
- std::pair<BVH4::NodeRef,BBox3fa> buildBVH(const GridRange& range, size_t& allocator);
-
- /*! Evaluates grid over patch and builds MSMBlur BVH4 tree over the grid. */
- std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, BBox3fa* bounds_o);
-
- /*! Create MBlur BVH4 tree over grid. */
- std::pair<BVH4::NodeRef,LBBox3fa> buildMBlurBVH(size_t time, const GridRange& range, size_t& allocator);
-
- /*! Create MSMBlur BVH4 tree over grid. */
- std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, size_t& allocator, BBox3fa* bounds_o);
-
- template<typename Loader>
- struct MapUV
- {
- typedef typename Loader::vfloat vfloat;
- const float* const grid_uv;
- size_t line_offset;
- size_t lines;
-
- __forceinline MapUV(const float* const grid_uv, size_t line_offset, const size_t lines)
- : grid_uv(grid_uv), line_offset(line_offset), lines(lines) {}
-
- __forceinline void operator() (vfloat& u, vfloat& v) const {
- const Vec3<vfloat> tri_v012_uv = Loader::gather(grid_uv,line_offset,lines);
- const Vec2<vfloat> uv0 = GridSOA::decodeUV(tri_v012_uv[0]);
- const Vec2<vfloat> uv1 = GridSOA::decodeUV(tri_v012_uv[1]);
- const Vec2<vfloat> uv2 = GridSOA::decodeUV(tri_v012_uv[2]);
- const Vec2<vfloat> uv = u * uv1 + v * uv2 + (1.0f-u-v) * uv0;
- u = uv[0];v = uv[1];
- }
- };
-
- struct Gather2x3
- {
- enum { M = 4 };
- typedef vbool4 vbool;
- typedef vint4 vint;
- typedef vfloat4 vfloat;
-
- static __forceinline const Vec3vf4 gather(const float* const grid, const size_t line_offset, const size_t lines)
- {
- vfloat4 r0 = vfloat4::loadu(grid + 0*line_offset);
- vfloat4 r1 = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid
- if (unlikely(line_offset == 2))
- {
- r0 = shuffle<0,1,1,1>(r0);
- r1 = shuffle<0,1,1,1>(r1);
- }
- return Vec3vf4(unpacklo(r0,r1), // r00, r10, r01, r11
- shuffle<1,1,2,2>(r0), // r01, r01, r02, r02
- shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12
- }
-
- static __forceinline void gather(const float* const grid_x,
- const float* const grid_y,
- const float* const grid_z,
- const size_t line_offset,
- const size_t lines,
- Vec3vf4& v0_o,
- Vec3vf4& v1_o,
- Vec3vf4& v2_o)
- {
- const Vec3vf4 tri_v012_x = gather(grid_x,line_offset,lines);
- const Vec3vf4 tri_v012_y = gather(grid_y,line_offset,lines);
- const Vec3vf4 tri_v012_z = gather(grid_z,line_offset,lines);
- v0_o = Vec3vf4(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]);
- v1_o = Vec3vf4(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]);
- v2_o = Vec3vf4(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]);
- }
- };
-
-#if defined (__AVX__)
- struct Gather3x3
- {
- enum { M = 8 };
- typedef vbool8 vbool;
- typedef vint8 vint;
- typedef vfloat8 vfloat;
-
- static __forceinline const Vec3vf8 gather(const float* const grid, const size_t line_offset, const size_t lines)
- {
- vfloat4 ra = vfloat4::loadu(grid + 0*line_offset);
- vfloat4 rb = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid
- vfloat4 rc;
- if (likely(lines > 2))
- rc = vfloat4::loadu(grid + 2*line_offset);
- else
- rc = rb;
-
- if (unlikely(line_offset == 2))
- {
- ra = shuffle<0,1,1,1>(ra);
- rb = shuffle<0,1,1,1>(rb);
- rc = shuffle<0,1,1,1>(rc);
- }
-
- const vfloat8 r0 = vfloat8(ra,rb);
- const vfloat8 r1 = vfloat8(rb,rc);
- return Vec3vf8(unpacklo(r0,r1), // r00, r10, r01, r11, r10, r20, r11, r21
- shuffle<1,1,2,2>(r0), // r01, r01, r02, r02, r11, r11, r12, r12
- shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12, r20, r21, r21, r22
- }
-
- static __forceinline void gather(const float* const grid_x,
- const float* const grid_y,
- const float* const grid_z,
- const size_t line_offset,
- const size_t lines,
- Vec3vf8& v0_o,
- Vec3vf8& v1_o,
- Vec3vf8& v2_o)
- {
- const Vec3vf8 tri_v012_x = gather(grid_x,line_offset,lines);
- const Vec3vf8 tri_v012_y = gather(grid_y,line_offset,lines);
- const Vec3vf8 tri_v012_z = gather(grid_z,line_offset,lines);
- v0_o = Vec3vf8(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]);
- v1_o = Vec3vf8(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]);
- v2_o = Vec3vf8(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]);
- }
- };
-#endif
-
- template<typename vfloat>
- static __forceinline Vec2<vfloat> decodeUV(const vfloat& uv)
- {
- typedef typename vfloat::Int vint;
- const vint iu = asInt(uv) & 0xffff;
- const vint iv = srl(asInt(uv),16);
- const vfloat u = (vfloat)iu * vfloat(8.0f/0x10000);
- const vfloat v = (vfloat)iv * vfloat(8.0f/0x10000);
- return Vec2<vfloat>(u,v);
- }
-
- __forceinline unsigned int geomID() const {
- return _geomID;
- }
-
- __forceinline unsigned int primID() const {
- return _primID;
- }
-
- public:
- BVH4::NodeRef troot;
-#if !defined(__X86_64__) && !defined(__aarch64__)
- unsigned align1;
-#endif
- unsigned time_steps;
- unsigned width;
-
- unsigned height;
- unsigned dim_offset;
- unsigned _geomID;
- unsigned _primID;
-
- unsigned align2;
- unsigned gridOffset;
- unsigned gridBytes;
- unsigned rootOffset;
-
- int8_t data[1]; //!< after the struct we first store the BVH, then the grid, and finally the roots
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector1.h b/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector1.h
deleted file mode 100644
index 2ed922a5ae..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector1.h
+++ /dev/null
@@ -1,207 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "grid_soa.h"
-#include "../common/ray.h"
-#include "triangle_intersector_pluecker.h"
-
-namespace embree
-{
- namespace isa
- {
- class GridSOAIntersector1
- {
- public:
- typedef void Primitive;
-
- class Precalculations
- {
- public:
- __forceinline Precalculations (const Ray& ray, const void* ptr)
- : grid(nullptr) {}
-
- public:
- GridSOA* grid;
- int itime;
- float ftime;
- };
-
- template<typename Loader>
- static __forceinline void intersect(RayHit& ray,
- IntersectContext* context,
- const float* const grid_x,
- const size_t line_offset,
- const size_t lines,
- Precalculations& pre)
- {
- typedef typename Loader::vfloat vfloat;
- const size_t dim_offset = pre.grid->dim_offset;
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
- Vec3<vfloat> v0, v1, v2;
- Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
- GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines);
- PlueckerIntersector1<Loader::M> intersector(ray,nullptr);
- intersector.intersect(ray,v0,v1,v2,mapUV,Intersect1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
- };
-
- template<typename Loader>
- static __forceinline bool occluded(Ray& ray,
- IntersectContext* context,
- const float* const grid_x,
- const size_t line_offset,
- const size_t lines,
- Precalculations& pre)
- {
- typedef typename Loader::vfloat vfloat;
- const size_t dim_offset = pre.grid->dim_offset;
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- Vec3<vfloat> v0, v1, v2;
- Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
-
- GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines);
- PlueckerIntersector1<Loader::M> intersector(ray,nullptr);
- return intersector.intersect(ray,v0,v1,v2,mapUV,Occluded1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
- }
-
- /*! Intersect a ray with the primitive. */
- static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t line_offset = pre.grid->width;
- const size_t lines = pre.grid->height;
- const float* const grid_x = pre.grid->decodeLeaf(0,prim);
-
-#if defined(__AVX__)
- intersect<GridSOA::Gather3x3>( ray, context, grid_x, line_offset, lines, pre);
-#else
- intersect<GridSOA::Gather2x3>(ray, context, grid_x , line_offset, lines, pre);
- if (likely(lines > 2))
- intersect<GridSOA::Gather2x3>(ray, context, grid_x+line_offset, line_offset, lines, pre);
-#endif
- }
-
- /*! Test if the ray is occluded by the primitive */
- static __forceinline bool occluded(Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t line_offset = pre.grid->width;
- const size_t lines = pre.grid->height;
- const float* const grid_x = pre.grid->decodeLeaf(0,prim);
-
-#if defined(__AVX__)
- return occluded<GridSOA::Gather3x3>( ray, context, grid_x, line_offset, lines, pre);
-#else
- if (occluded<GridSOA::Gather2x3>(ray, context, grid_x , line_offset, lines, pre)) return true;
- if (likely(lines > 2))
- if (occluded<GridSOA::Gather2x3>(ray, context, grid_x+line_offset, line_offset, lines, pre)) return true;
-#endif
- return false;
- }
- };
-
- class GridSOAMBIntersector1
- {
- public:
- typedef void Primitive;
- typedef GridSOAIntersector1::Precalculations Precalculations;
-
- template<typename Loader>
- static __forceinline void intersect(RayHit& ray, const float ftime,
- IntersectContext* context,
- const float* const grid_x,
- const size_t line_offset,
- const size_t lines,
- Precalculations& pre)
- {
- typedef typename Loader::vfloat vfloat;
- const size_t dim_offset = pre.grid->dim_offset;
- const size_t grid_offset = pre.grid->gridBytes >> 2;
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- Vec3<vfloat> a0, a1, a2;
- Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
-
- Vec3<vfloat> b0, b1, b2;
- Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
-
- Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
- Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
- Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
-
- GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines);
- PlueckerIntersector1<Loader::M> intersector(ray,nullptr);
- intersector.intersect(ray,v0,v1,v2,mapUV,Intersect1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
- };
-
- template<typename Loader>
- static __forceinline bool occluded(Ray& ray, const float ftime,
- IntersectContext* context,
- const float* const grid_x,
- const size_t line_offset,
- const size_t lines,
- Precalculations& pre)
- {
- typedef typename Loader::vfloat vfloat;
- const size_t dim_offset = pre.grid->dim_offset;
- const size_t grid_offset = pre.grid->gridBytes >> 2;
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- Vec3<vfloat> a0, a1, a2;
- Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
-
- Vec3<vfloat> b0, b1, b2;
- Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
-
- Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
- Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
- Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
-
- GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines);
- PlueckerIntersector1<Loader::M> intersector(ray,nullptr);
- return intersector.intersect(ray,v0,v1,v2,mapUV,Occluded1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
- }
-
- /*! Intersect a ray with the primitive. */
- static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t line_offset = pre.grid->width;
- const size_t lines = pre.grid->height;
- const float* const grid_x = pre.grid->decodeLeaf(pre.itime,prim);
-
-#if defined(__AVX__)
- intersect<GridSOA::Gather3x3>( ray, pre.ftime, context, grid_x, line_offset, lines, pre);
-#else
- intersect<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x, line_offset, lines, pre);
- if (likely(lines > 2))
- intersect<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x+line_offset, line_offset, lines, pre);
-#endif
- }
-
- /*! Test if the ray is occluded by the primitive */
- static __forceinline bool occluded(Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t line_offset = pre.grid->width;
- const size_t lines = pre.grid->height;
- const float* const grid_x = pre.grid->decodeLeaf(pre.itime,prim);
-
-#if defined(__AVX__)
- return occluded<GridSOA::Gather3x3>( ray, pre.ftime, context, grid_x, line_offset, lines, pre);
-#else
- if (occluded<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x , line_offset, lines, pre)) return true;
- if (likely(lines > 2))
- if (occluded<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x+line_offset, line_offset, lines, pre)) return true;
-#endif
- return false;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector_packet.h b/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector_packet.h
deleted file mode 100644
index 41d66e1e28..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector_packet.h
+++ /dev/null
@@ -1,445 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "grid_soa.h"
-#include "../common/ray.h"
-#include "triangle_intersector_pluecker.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int K>
- struct MapUV0
- {
- const float* const grid_uv;
- size_t ofs00, ofs01, ofs10, ofs11;
-
- __forceinline MapUV0(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11)
- : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {}
-
- __forceinline void operator() (vfloat<K>& u, vfloat<K>& v) const {
- const vfloat<K> uv00(grid_uv[ofs00]);
- const vfloat<K> uv01(grid_uv[ofs01]);
- const vfloat<K> uv10(grid_uv[ofs10]);
- const vfloat<K> uv11(grid_uv[ofs11]);
- const Vec2vf<K> uv0 = GridSOA::decodeUV(uv00);
- const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01);
- const Vec2vf<K> uv2 = GridSOA::decodeUV(uv10);
- const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0));
- u = uv[0]; v = uv[1];
- }
- };
-
- template<int K>
- struct MapUV1
- {
- const float* const grid_uv;
- size_t ofs00, ofs01, ofs10, ofs11;
-
- __forceinline MapUV1(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11)
- : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {}
-
- __forceinline void operator() (vfloat<K>& u, vfloat<K>& v) const {
- const vfloat<K> uv00(grid_uv[ofs00]);
- const vfloat<K> uv01(grid_uv[ofs01]);
- const vfloat<K> uv10(grid_uv[ofs10]);
- const vfloat<K> uv11(grid_uv[ofs11]);
- const Vec2vf<K> uv0 = GridSOA::decodeUV(uv10);
- const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01);
- const Vec2vf<K> uv2 = GridSOA::decodeUV(uv11);
- const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0));
- u = uv[0]; v = uv[1];
- }
- };
-
- template<int K>
- class GridSOAIntersectorK
- {
- public:
- typedef void Primitive;
-
- class Precalculations
- {
-#if defined(__AVX__)
- static const int M = 8;
-#else
- static const int M = 4;
-#endif
-
- public:
- __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray)
- : grid(nullptr), intersector(valid,ray) {}
-
- public:
- GridSOA* grid;
- PlueckerIntersectorK<M,K> intersector; // FIXME: use quad intersector
- };
-
- /*! Intersect a ray with the primitive. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t dim_offset = pre.grid->dim_offset;
- const size_t line_offset = pre.grid->width;
- const float* const grid_x = pre.grid->decodeLeaf(0,prim);
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- const size_t max_x = pre.grid->width == 2 ? 1 : 2;
- const size_t max_y = pre.grid->height == 2 ? 1 : 2;
- for (size_t y=0; y<max_y; y++)
- {
- for (size_t x=0; x<max_x; x++)
- {
- const size_t ofs00 = (y+0)*line_offset+(x+0);
- const size_t ofs01 = (y+0)*line_offset+(x+1);
- const size_t ofs10 = (y+1)*line_offset+(x+0);
- const size_t ofs11 = (y+1)*line_offset+(x+1);
- const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
- const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
- const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
- const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
-
- pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
- pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
- }
- }
- }
-
- /*! Test if the ray is occluded by the primitive */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t dim_offset = pre.grid->dim_offset;
- const size_t line_offset = pre.grid->width;
- const float* const grid_x = pre.grid->decodeLeaf(0,prim);
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- vbool<K> valid = valid_i;
- const size_t max_x = pre.grid->width == 2 ? 1 : 2;
- const size_t max_y = pre.grid->height == 2 ? 1 : 2;
- for (size_t y=0; y<max_y; y++)
- {
- for (size_t x=0; x<max_x; x++)
- {
- const size_t ofs00 = (y+0)*line_offset+(x+0);
- const size_t ofs01 = (y+0)*line_offset+(x+1);
- const size_t ofs10 = (y+1)*line_offset+(x+0);
- const size_t ofs11 = (y+1)*line_offset+(x+1);
- const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
- const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
- const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
- const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
-
- pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
- if (none(valid)) break;
- pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
- if (none(valid)) break;
- }
- }
- return !valid;
- }
-
- template<typename Loader>
- static __forceinline void intersect(RayHitK<K>& ray, size_t k,
- IntersectContext* context,
- const float* const grid_x,
- const size_t line_offset,
- const size_t lines,
- Precalculations& pre)
- {
- typedef typename Loader::vfloat vfloat;
- const size_t dim_offset = pre.grid->dim_offset;
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
- Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
- pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
- };
-
- template<typename Loader>
- static __forceinline bool occluded(RayK<K>& ray, size_t k,
- IntersectContext* context,
- const float* const grid_x,
- const size_t line_offset,
- const size_t lines,
- Precalculations& pre)
- {
- typedef typename Loader::vfloat vfloat;
- const size_t dim_offset = pre.grid->dim_offset;
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
- Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
- return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
- }
-
- /*! Intersect a ray with the primitive. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t line_offset = pre.grid->width;
- const size_t lines = pre.grid->height;
- const float* const grid_x = pre.grid->decodeLeaf(0,prim);
-#if defined(__AVX__)
- intersect<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre);
-#else
- intersect<GridSOA::Gather2x3>(ray, k, context, grid_x , line_offset, lines, pre);
- if (likely(lines > 2))
- intersect<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre);
-#endif
- }
-
- /*! Test if the ray is occluded by the primitive */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t line_offset = pre.grid->width;
- const size_t lines = pre.grid->height;
- const float* const grid_x = pre.grid->decodeLeaf(0,prim);
-
-#if defined(__AVX__)
- return occluded<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre);
-#else
- if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x , line_offset, lines, pre)) return true;
- if (likely(lines > 2))
- if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre)) return true;
-#endif
- return false;
- }
- };
-
- template<int K>
- class GridSOAMBIntersectorK
- {
- public:
- typedef void Primitive;
- typedef typename GridSOAIntersectorK<K>::Precalculations Precalculations;
-
- /*! Intersect a ray with the primitive. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- vfloat<K> vftime;
- vint<K> vitime = getTimeSegment(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime);
-
- vbool<K> valid1 = valid_i;
- while (any(valid1)) {
- const size_t j = bsf(movemask(valid1));
- const int itime = vitime[j];
- const vbool<K> valid2 = valid1 & (itime == vitime);
- valid1 = valid1 & !valid2;
- intersect(valid2,pre,ray,vftime,itime,context,prim,lazy_node);
- }
- }
-
- /*! Intersect a ray with the primitive. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, const vfloat<K>& ftime, int itime, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t grid_offset = pre.grid->gridBytes >> 2;
- const size_t dim_offset = pre.grid->dim_offset;
- const size_t line_offset = pre.grid->width;
- const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- const size_t max_x = pre.grid->width == 2 ? 1 : 2;
- const size_t max_y = pre.grid->height == 2 ? 1 : 2;
- for (size_t y=0; y<max_y; y++)
- {
- for (size_t x=0; x<max_x; x++)
- {
- size_t ofs00 = (y+0)*line_offset+(x+0);
- size_t ofs01 = (y+0)*line_offset+(x+1);
- size_t ofs10 = (y+1)*line_offset+(x+0);
- size_t ofs11 = (y+1)*line_offset+(x+1);
- const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
- const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
- const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
- const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
- ofs00 += grid_offset;
- ofs01 += grid_offset;
- ofs10 += grid_offset;
- ofs11 += grid_offset;
- const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
- const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
- const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
- const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
- const Vec3vf<K> p00 = lerp(a00,b00,ftime);
- const Vec3vf<K> p01 = lerp(a01,b01,ftime);
- const Vec3vf<K> p10 = lerp(a10,b10,ftime);
- const Vec3vf<K> p11 = lerp(a11,b11,ftime);
-
- pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
- pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
- }
- }
- }
-
- /*! Test if the ray is occluded by the primitive */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- vfloat<K> vftime;
- vint<K> vitime = getTimeSegment(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime);
-
- vbool<K> valid_o = valid_i;
- vbool<K> valid1 = valid_i;
- while (any(valid1)) {
- const int j = int(bsf(movemask(valid1)));
- const int itime = vitime[j];
- const vbool<K> valid2 = valid1 & (itime == vitime);
- valid1 = valid1 & !valid2;
- valid_o &= !valid2 | occluded(valid2,pre,ray,vftime,itime,context,prim,lazy_node);
- }
- return !valid_o;
- }
-
- /*! Test if the ray is occluded by the primitive */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, const vfloat<K>& ftime, int itime, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t grid_offset = pre.grid->gridBytes >> 2;
- const size_t dim_offset = pre.grid->dim_offset;
- const size_t line_offset = pre.grid->width;
- const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- vbool<K> valid = valid_i;
- const size_t max_x = pre.grid->width == 2 ? 1 : 2;
- const size_t max_y = pre.grid->height == 2 ? 1 : 2;
- for (size_t y=0; y<max_y; y++)
- {
- for (size_t x=0; x<max_x; x++)
- {
- size_t ofs00 = (y+0)*line_offset+(x+0);
- size_t ofs01 = (y+0)*line_offset+(x+1);
- size_t ofs10 = (y+1)*line_offset+(x+0);
- size_t ofs11 = (y+1)*line_offset+(x+1);
- const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
- const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
- const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
- const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
- ofs00 += grid_offset;
- ofs01 += grid_offset;
- ofs10 += grid_offset;
- ofs11 += grid_offset;
- const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
- const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
- const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
- const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
- const Vec3vf<K> p00 = lerp(a00,b00,ftime);
- const Vec3vf<K> p01 = lerp(a01,b01,ftime);
- const Vec3vf<K> p10 = lerp(a10,b10,ftime);
- const Vec3vf<K> p11 = lerp(a11,b11,ftime);
-
- pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
- if (none(valid)) break;
- pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
- if (none(valid)) break;
- }
- }
- return valid;
- }
-
- template<typename Loader>
- static __forceinline void intersect(RayHitK<K>& ray, size_t k,
- const float ftime,
- IntersectContext* context,
- const float* const grid_x,
- const size_t line_offset,
- const size_t lines,
- Precalculations& pre)
- {
- typedef typename Loader::vfloat vfloat;
- const size_t grid_offset = pre.grid->gridBytes >> 2;
- const size_t dim_offset = pre.grid->dim_offset;
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- Vec3<vfloat> a0, a1, a2;
- Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
-
- Vec3<vfloat> b0, b1, b2;
- Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
-
- Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
- Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
- Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
-
- pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
- };
-
- template<typename Loader>
- static __forceinline bool occluded(RayK<K>& ray, size_t k,
- const float ftime,
- IntersectContext* context,
- const float* const grid_x,
- const size_t line_offset,
- const size_t lines,
- Precalculations& pre)
- {
- typedef typename Loader::vfloat vfloat;
- const size_t grid_offset = pre.grid->gridBytes >> 2;
- const size_t dim_offset = pre.grid->dim_offset;
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- Vec3<vfloat> a0, a1, a2;
- Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
-
- Vec3<vfloat> b0, b1, b2;
- Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
-
- Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
- Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
- Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
-
- return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
- }
-
- /*! Intersect a ray with the primitive. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- float ftime;
- int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime);
-
- const size_t line_offset = pre.grid->width;
- const size_t lines = pre.grid->height;
- const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
-
-#if defined(__AVX__)
- intersect<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre);
-#else
- intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre);
- if (likely(lines > 2))
- intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre);
-#endif
- }
-
- /*! Test if the ray is occluded by the primitive */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- float ftime;
- int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime);
-
- const size_t line_offset = pre.grid->width;
- const size_t lines = pre.grid->height;
- const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
-
-#if defined(__AVX__)
- return occluded<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre);
-#else
- if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre)) return true;
- if (likely(lines > 2))
- if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre)) return true;
-#endif
- return false;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/instance.h b/thirdparty/embree-aarch64/kernels/geometry/instance.h
deleted file mode 100644
index 66893d581f..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/instance.h
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-#include "../common/scene_instance.h"
-
-namespace embree
-{
- struct InstancePrimitive
- {
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* primitive supports multiple time segments */
- static const bool singleTimeSegment = false;
-
- /* Returns maximum number of stored primitives */
- static __forceinline size_t max_size() { return 1; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return N; }
-
- public:
-
- InstancePrimitive (const Instance* instance, unsigned int instID)
- : instance(instance)
- , instID_(instID)
- {}
-
- __forceinline void fill(const PrimRef* prims, size_t& i, size_t end, Scene* scene)
- {
- assert(end-i == 1);
- const PrimRef& prim = prims[i]; i++;
- const unsigned int geomID = prim.geomID();
- const Instance* instance = scene->get<Instance>(geomID);
- new (this) InstancePrimitive(instance, geomID);
- }
-
- __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& i, size_t end, Scene* scene, size_t itime)
- {
- assert(end-i == 1);
- const PrimRef& prim = prims[i]; i++;
- const unsigned int geomID = prim.geomID();
- const Instance* instance = scene->get<Instance>(geomID);
- new (this) InstancePrimitive(instance,geomID);
- return instance->linearBounds(0,itime);
- }
-
- __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& i, size_t end, Scene* scene, const BBox1f time_range)
- {
- assert(end-i == 1);
- const PrimRefMB& prim = prims[i]; i++;
- const unsigned int geomID = prim.geomID();
- const Instance* instance = scene->get<Instance>(geomID);
- new (this) InstancePrimitive(instance,geomID);
- return instance->linearBounds(0,time_range);
- }
-
- /* Updates the primitive */
- __forceinline BBox3fa update(Instance* instance) {
- return instance->bounds(0);
- }
-
- public:
- const Instance* instance;
- const unsigned int instID_ = std::numeric_limits<unsigned int>::max ();
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/instance_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/instance_intersector.h
deleted file mode 100644
index 91731a39c5..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/instance_intersector.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "instance.h"
-#include "../common/ray.h"
-#include "../common/point_query.h"
-
-namespace embree
-{
- namespace isa
- {
- struct InstanceIntersector1
- {
- typedef InstancePrimitive Primitive;
-
- struct Precalculations {
- __forceinline Precalculations (const Ray& ray, const void *ptr) {}
- };
-
- static void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim);
- static bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim);
- static bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim);
- };
-
- struct InstanceIntersector1MB
- {
- typedef InstancePrimitive Primitive;
-
- struct Precalculations {
- __forceinline Precalculations (const Ray& ray, const void *ptr) {}
- };
-
- static void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim);
- static bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim);
- static bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim);
- };
-
- template<int K>
- struct InstanceIntersectorK
- {
- typedef InstancePrimitive Primitive;
-
- struct Precalculations {
- __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) {}
- };
-
- static void intersect(const vbool<K>& valid_i, const Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& prim);
- static vbool<K> occluded(const vbool<K>& valid_i, const Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& prim);
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) {
- intersect(vbool<K>(1<<int(k)),pre,ray,context,prim);
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) {
- occluded(vbool<K>(1<<int(k)),pre,ray,context,prim);
- return ray.tfar[k] < 0.0f;
- }
- };
-
- template<int K>
- struct InstanceIntersectorKMB
- {
- typedef InstancePrimitive Primitive;
-
- struct Precalculations {
- __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) {}
- };
-
- static void intersect(const vbool<K>& valid_i, const Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& prim);
- static vbool<K> occluded(const vbool<K>& valid_i, const Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& prim);
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) {
- intersect(vbool<K>(1<<int(k)),pre,ray,context,prim);
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) {
- occluded(vbool<K>(1<<int(k)),pre,ray,context,prim);
- return ray.tfar[k] < 0.0f;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/intersector_epilog.h b/thirdparty/embree-aarch64/kernels/geometry/intersector_epilog.h
deleted file mode 100644
index 0df49dd6e9..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/intersector_epilog.h
+++ /dev/null
@@ -1,1074 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "../common/context.h"
-#include "filter.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct UVIdentity {
- __forceinline void operator() (vfloat<M>& u, vfloat<M>& v) const {}
- };
-
-
- template<bool filter>
- struct Intersect1Epilog1
- {
- RayHit& ray;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline Intersect1Epilog1(RayHit& ray,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline bool operator() (Hit& hit) const
- {
- /* ray mask test */
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-#if defined(EMBREE_RAY_MASK)
- if ((geometry->mask & ray.mask) == 0) return false;
-#endif
- hit.finalize();
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) {
- HitK<1> h(context->user,geomID,primID,hit.u,hit.v,hit.Ng);
- const float old_t = ray.tfar;
- ray.tfar = hit.t;
- bool found = runIntersectionFilter1(geometry,ray,context,h);
- if (!found) ray.tfar = old_t;
- return found;
- }
- }
-#endif
-
- /* update hit information */
- ray.tfar = hit.t;
- ray.Ng = hit.Ng;
- ray.u = hit.u;
- ray.v = hit.v;
- ray.primID = primID;
- ray.geomID = geomID;
- instance_id_stack::copy(context->user->instID, ray.instID);
- return true;
- }
- };
-
- template<bool filter>
- struct Occluded1Epilog1
- {
- Ray& ray;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline Occluded1Epilog1(Ray& ray,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline bool operator() (Hit& hit) const
- {
- /* ray mask test */
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
-
-#if defined(EMBREE_RAY_MASK)
- if ((geometry->mask & ray.mask) == 0) return false;
-#endif
- hit.finalize();
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) {
- HitK<1> h(context->user,geomID,primID,hit.u,hit.v,hit.Ng);
- const float old_t = ray.tfar;
- ray.tfar = hit.t;
- const bool found = runOcclusionFilter1(geometry,ray,context,h);
- if (!found) ray.tfar = old_t;
- return found;
- }
- }
-#endif
- return true;
- }
- };
-
- template<int K, bool filter>
- struct Intersect1KEpilog1
- {
- RayHitK<K>& ray;
- size_t k;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline Intersect1KEpilog1(RayHitK<K>& ray, size_t k,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), k(k), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline bool operator() (Hit& hit) const
- {
- /* ray mask test */
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-#if defined(EMBREE_RAY_MASK)
- if ((geometry->mask & ray.mask[k]) == 0)
- return false;
-#endif
- hit.finalize();
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) {
- HitK<K> h(context->user,geomID,primID,hit.u,hit.v,hit.Ng);
- const float old_t = ray.tfar[k];
- ray.tfar[k] = hit.t;
- const bool found = any(runIntersectionFilter(vbool<K>(1<<k),geometry,ray,context,h));
- if (!found) ray.tfar[k] = old_t;
- return found;
- }
- }
-#endif
-
- /* update hit information */
- ray.tfar[k] = hit.t;
- ray.Ng.x[k] = hit.Ng.x;
- ray.Ng.y[k] = hit.Ng.y;
- ray.Ng.z[k] = hit.Ng.z;
- ray.u[k] = hit.u;
- ray.v[k] = hit.v;
- ray.primID[k] = primID;
- ray.geomID[k] = geomID;
- instance_id_stack::copy<const unsigned*, vuint<K>*, const size_t&>(context->user->instID, ray.instID, k);
- return true;
- }
- };
-
- template<int K, bool filter>
- struct Occluded1KEpilog1
- {
- RayK<K>& ray;
- size_t k;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline Occluded1KEpilog1(RayK<K>& ray, size_t k,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), k(k), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline bool operator() (Hit& hit) const
- {
- /* ray mask test */
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-#if defined(EMBREE_RAY_MASK)
- if ((geometry->mask & ray.mask[k]) == 0)
- return false;
-#endif
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) {
- hit.finalize();
- HitK<K> h(context->user,geomID,primID,hit.u,hit.v,hit.Ng);
- const float old_t = ray.tfar[k];
- ray.tfar[k] = hit.t;
- const bool found = any(runOcclusionFilter(vbool<K>(1<<k),geometry,ray,context,h));
- if (!found) ray.tfar[k] = old_t;
- return found;
- }
- }
-#endif
- return true;
- }
- };
-
- template<int M, int Mx, bool filter>
- struct Intersect1EpilogM
- {
- RayHit& ray;
- IntersectContext* context;
- const vuint<M>& geomIDs;
- const vuint<M>& primIDs;
-
- __forceinline Intersect1EpilogM(RayHit& ray,
- IntersectContext* context,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs)
- : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const
- {
- Scene* scene MAYBE_UNUSED = context->scene;
- vbool<Mx> valid = valid_i;
- if (Mx > M) valid &= (1<<M)-1;
- hit.finalize();
- size_t i = select_min(valid,hit.vt);
- unsigned int geomID = geomIDs[i];
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK)
- bool foundhit = false;
- goto entry;
- while (true)
- {
- if (unlikely(none(valid))) return foundhit;
- i = select_min(valid,hit.vt);
-
- geomID = geomIDs[i];
- entry:
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
-#if defined(EMBREE_RAY_MASK)
- /* goto next hit if mask test fails */
- if ((geometry->mask & ray.mask) == 0) {
- clear(valid,i);
- continue;
- }
-#endif
-
-#if defined(EMBREE_FILTER_FUNCTION)
- /* call intersection filter function */
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) {
- const Vec2f uv = hit.uv(i);
- HitK<1> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i));
- const float old_t = ray.tfar;
- ray.tfar = hit.t(i);
- const bool found = runIntersectionFilter1(geometry,ray,context,h);
- if (!found) ray.tfar = old_t;
- foundhit |= found;
- clear(valid,i);
- valid &= hit.vt <= ray.tfar; // intersection filters may modify tfar value
- continue;
- }
- }
-#endif
- break;
- }
-#endif
-
- /* update hit information */
- const Vec2f uv = hit.uv(i);
- ray.tfar = hit.vt[i];
- ray.Ng.x = hit.vNg.x[i];
- ray.Ng.y = hit.vNg.y[i];
- ray.Ng.z = hit.vNg.z[i];
- ray.u = uv.x;
- ray.v = uv.y;
- ray.primID = primIDs[i];
- ray.geomID = geomID;
- instance_id_stack::copy(context->user->instID, ray.instID);
- return true;
-
- }
- };
-
-#if 0 && defined(__AVX512F__) // do not enable, this reduced frequency for BVH4
- template<int M, bool filter>
- struct Intersect1EpilogM<M,16,filter>
- {
- static const size_t Mx = 16;
- RayHit& ray;
- IntersectContext* context;
- const vuint<M>& geomIDs;
- const vuint<M>& primIDs;
-
- __forceinline Intersect1EpilogM(RayHit& ray,
- IntersectContext* context,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs)
- : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const
- {
- Scene* MAYBE_UNUSED scene = context->scene;
- vbool<Mx> valid = valid_i;
- if (Mx > M) valid &= (1<<M)-1;
- hit.finalize();
- size_t i = select_min(valid,hit.vt);
- unsigned int geomID = geomIDs[i];
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK)
- bool foundhit = false;
- goto entry;
- while (true)
- {
- if (unlikely(none(valid))) return foundhit;
- i = select_min(valid,hit.vt);
-
- geomID = geomIDs[i];
- entry:
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
-#if defined(EMBREE_RAY_MASK)
- /* goto next hit if mask test fails */
- if ((geometry->mask & ray.mask) == 0) {
- clear(valid,i);
- continue;
- }
-#endif
-
-#if defined(EMBREE_FILTER_FUNCTION)
- /* call intersection filter function */
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) {
- const Vec2f uv = hit.uv(i);
- HitK<1> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i));
- const float old_t = ray.tfar;
- ray.tfar = hit.t(i);
- const bool found = runIntersectionFilter1(geometry,ray,context,h);
- if (!found) ray.tfar = old_t;
- foundhit |= found;
- clear(valid,i);
- valid &= hit.vt <= ray.tfar; // intersection filters may modify tfar value
- continue;
- }
- }
-#endif
- break;
- }
-#endif
-
- vbool<Mx> finalMask(((unsigned int)1 << i));
- ray.update(finalMask,hit.vt,hit.vu,hit.vv,hit.vNg.x,hit.vNg.y,hit.vNg.z,geomID,primIDs);
- instance_id_stack::foreach([&](unsigned level)
- {
- ray.instID[level] = context->user->instID[level];
- return (context->user->instID[level] != RTC_INVALID_GEOMETRY_ID);
- });
- return true;
-
- }
- };
-#endif
-
- template<int M, int Mx, bool filter>
- struct Occluded1EpilogM
- {
- Ray& ray;
- IntersectContext* context;
- const vuint<M>& geomIDs;
- const vuint<M>& primIDs;
-
- __forceinline Occluded1EpilogM(Ray& ray,
- IntersectContext* context,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs)
- : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const
- {
- Scene* scene MAYBE_UNUSED = context->scene;
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK)
- if (unlikely(filter))
- hit.finalize(); /* called only once */
-
- vbool<Mx> valid = valid_i;
- if (Mx > M) valid &= (1<<M)-1;
- size_t m=movemask(valid);
- goto entry;
- while (true)
- {
- if (unlikely(m == 0)) return false;
- entry:
- size_t i=bsf(m);
-
- const unsigned int geomID = geomIDs[i];
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
-#if defined(EMBREE_RAY_MASK)
- /* goto next hit if mask test fails */
- if ((geometry->mask & ray.mask) == 0) {
- m=btc(m,i);
- continue;
- }
-#endif
-
-#if defined(EMBREE_FILTER_FUNCTION)
- /* if we have no filter then the test passed */
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter()))
- {
- const Vec2f uv = hit.uv(i);
- HitK<1> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i));
- const float old_t = ray.tfar;
- ray.tfar = hit.t(i);
- if (runOcclusionFilter1(geometry,ray,context,h)) return true;
- ray.tfar = old_t;
- m=btc(m,i);
- continue;
- }
- }
-#endif
- break;
- }
-#endif
-
- return true;
- }
- };
-
- template<int M, bool filter>
- struct Intersect1EpilogMU
- {
- RayHit& ray;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline Intersect1EpilogMU(RayHit& ray,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<M>& valid_i, Hit& hit) const
- {
- /* ray mask test */
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-#if defined(EMBREE_RAY_MASK)
- if ((geometry->mask & ray.mask) == 0) return false;
-#endif
-
- vbool<M> valid = valid_i;
- hit.finalize();
-
- size_t i = select_min(valid,hit.vt);
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter()))
- {
- bool foundhit = false;
- while (true)
- {
- /* call intersection filter function */
- Vec2f uv = hit.uv(i);
- const float old_t = ray.tfar;
- ray.tfar = hit.t(i);
- HitK<1> h(context->user,geomID,primID,uv.x,uv.y,hit.Ng(i));
- const bool found = runIntersectionFilter1(geometry,ray,context,h);
- if (!found) ray.tfar = old_t;
- foundhit |= found;
- clear(valid,i);
- valid &= hit.vt <= ray.tfar; // intersection filters may modify tfar value
- if (unlikely(none(valid))) break;
- i = select_min(valid,hit.vt);
- }
- return foundhit;
- }
-#endif
-
- /* update hit information */
- const Vec2f uv = hit.uv(i);
- const Vec3fa Ng = hit.Ng(i);
- ray.tfar = hit.t(i);
- ray.Ng.x = Ng.x;
- ray.Ng.y = Ng.y;
- ray.Ng.z = Ng.z;
- ray.u = uv.x;
- ray.v = uv.y;
- ray.primID = primID;
- ray.geomID = geomID;
- instance_id_stack::copy(context->user->instID, ray.instID);
- return true;
- }
- };
-
- template<int M, bool filter>
- struct Occluded1EpilogMU
- {
- Ray& ray;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline Occluded1EpilogMU(Ray& ray,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<M>& valid, Hit& hit) const
- {
- /* ray mask test */
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-#if defined(EMBREE_RAY_MASK)
- if ((geometry->mask & ray.mask) == 0) return false;
-#endif
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter()))
- {
- hit.finalize();
- for (size_t m=movemask(valid), i=bsf(m); m!=0; m=btc(m,i), i=bsf(m))
- {
- const Vec2f uv = hit.uv(i);
- const float old_t = ray.tfar;
- ray.tfar = hit.t(i);
- HitK<1> h(context->user,geomID,primID,uv.x,uv.y,hit.Ng(i));
- if (runOcclusionFilter1(geometry,ray,context,h)) return true;
- ray.tfar = old_t;
- }
- return false;
- }
-#endif
- return true;
- }
- };
-
- template<int M, int K, bool filter>
- struct IntersectKEpilogM
- {
- RayHitK<K>& ray;
- IntersectContext* context;
- const vuint<M>& geomIDs;
- const vuint<M>& primIDs;
- const size_t i;
-
- __forceinline IntersectKEpilogM(RayHitK<K>& ray,
- IntersectContext* context,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs,
- size_t i)
- : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs), i(i) {}
-
- template<typename Hit>
- __forceinline vbool<K> operator() (const vbool<K>& valid_i, const Hit& hit) const
- {
- Scene* scene MAYBE_UNUSED = context->scene;
-
- vfloat<K> u, v, t;
- Vec3vf<K> Ng;
- vbool<K> valid = valid_i;
-
- std::tie(u,v,t,Ng) = hit();
-
- const unsigned int geomID = geomIDs[i];
- const unsigned int primID = primIDs[i];
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
- /* ray masking test */
-#if defined(EMBREE_RAY_MASK)
- valid &= (geometry->mask & ray.mask) != 0;
- if (unlikely(none(valid))) return false;
-#endif
-
- /* occlusion filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) {
- HitK<K> h(context->user,geomID,primID,u,v,Ng);
- const vfloat<K> old_t = ray.tfar;
- ray.tfar = select(valid,t,ray.tfar);
- const vbool<K> m_accept = runIntersectionFilter(valid,geometry,ray,context,h);
- ray.tfar = select(m_accept,ray.tfar,old_t);
- return m_accept;
- }
- }
-#endif
-
- /* update hit information */
- vfloat<K>::store(valid,&ray.tfar,t);
- vfloat<K>::store(valid,&ray.Ng.x,Ng.x);
- vfloat<K>::store(valid,&ray.Ng.y,Ng.y);
- vfloat<K>::store(valid,&ray.Ng.z,Ng.z);
- vfloat<K>::store(valid,&ray.u,u);
- vfloat<K>::store(valid,&ray.v,v);
- vuint<K>::store(valid,&ray.primID,primID);
- vuint<K>::store(valid,&ray.geomID,geomID);
- instance_id_stack::copy<const unsigned*, vuint<K>*, const vbool<K>&>(context->user->instID, ray.instID, valid);
- return valid;
- }
- };
-
- template<int M, int K, bool filter>
- struct OccludedKEpilogM
- {
- vbool<K>& valid0;
- RayK<K>& ray;
- IntersectContext* context;
- const vuint<M>& geomIDs;
- const vuint<M>& primIDs;
- const size_t i;
-
- __forceinline OccludedKEpilogM(vbool<K>& valid0,
- RayK<K>& ray,
- IntersectContext* context,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs,
- size_t i)
- : valid0(valid0), ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs), i(i) {}
-
- template<typename Hit>
- __forceinline vbool<K> operator() (const vbool<K>& valid_i, const Hit& hit) const
- {
- vbool<K> valid = valid_i;
-
- /* ray masking test */
- Scene* scene MAYBE_UNUSED = context->scene;
- const unsigned int geomID = geomIDs[i];
- const unsigned int primID = primIDs[i];
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-#if defined(EMBREE_RAY_MASK)
- valid &= (geometry->mask & ray.mask) != 0;
- if (unlikely(none(valid))) return valid;
-#endif
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter()))
- {
- vfloat<K> u, v, t;
- Vec3vf<K> Ng;
- std::tie(u,v,t,Ng) = hit();
- HitK<K> h(context->user,geomID,primID,u,v,Ng);
- const vfloat<K> old_t = ray.tfar;
- ray.tfar = select(valid,t,ray.tfar);
- valid = runOcclusionFilter(valid,geometry,ray,context,h);
- ray.tfar = select(valid,ray.tfar,old_t);
- }
- }
-#endif
-
- /* update occlusion */
- valid0 = valid0 & !valid;
- return valid;
- }
- };
-
- template<int M, int K, bool filter>
- struct IntersectKEpilogMU
- {
- RayHitK<K>& ray;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline IntersectKEpilogMU(RayHitK<K>& ray,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline vbool<K> operator() (const vbool<K>& valid_org, const Hit& hit) const
- {
- vbool<K> valid = valid_org;
- vfloat<K> u, v, t;
- Vec3vf<K> Ng;
- std::tie(u,v,t,Ng) = hit();
-
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
- /* ray masking test */
-#if defined(EMBREE_RAY_MASK)
- valid &= (geometry->mask & ray.mask) != 0;
- if (unlikely(none(valid))) return false;
-#endif
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) {
- HitK<K> h(context->user,geomID,primID,u,v,Ng);
- const vfloat<K> old_t = ray.tfar;
- ray.tfar = select(valid,t,ray.tfar);
- const vbool<K> m_accept = runIntersectionFilter(valid,geometry,ray,context,h);
- ray.tfar = select(m_accept,ray.tfar,old_t);
- return m_accept;
- }
- }
-#endif
-
- /* update hit information */
- vfloat<K>::store(valid,&ray.tfar,t);
- vfloat<K>::store(valid,&ray.Ng.x,Ng.x);
- vfloat<K>::store(valid,&ray.Ng.y,Ng.y);
- vfloat<K>::store(valid,&ray.Ng.z,Ng.z);
- vfloat<K>::store(valid,&ray.u,u);
- vfloat<K>::store(valid,&ray.v,v);
- vuint<K>::store(valid,&ray.primID,primID);
- vuint<K>::store(valid,&ray.geomID,geomID);
- instance_id_stack::copy<const unsigned*, vuint<K>*, const vbool<K>&>(context->user->instID, ray.instID, valid);
-
- return valid;
- }
- };
-
- template<int M, int K, bool filter>
- struct OccludedKEpilogMU
- {
- vbool<K>& valid0;
- RayK<K>& ray;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline OccludedKEpilogMU(vbool<K>& valid0,
- RayK<K>& ray,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : valid0(valid0), ray(ray), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline vbool<K> operator() (const vbool<K>& valid_i, const Hit& hit) const
- {
- vbool<K> valid = valid_i;
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
-#if defined(EMBREE_RAY_MASK)
- valid &= (geometry->mask & ray.mask) != 0;
- if (unlikely(none(valid))) return false;
-#endif
-
- /* occlusion filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter()))
- {
- vfloat<K> u, v, t;
- Vec3vf<K> Ng;
- std::tie(u,v,t,Ng) = hit();
- HitK<K> h(context->user,geomID,primID,u,v,Ng);
- const vfloat<K> old_t = ray.tfar;
- ray.tfar = select(valid,t,ray.tfar);
- valid = runOcclusionFilter(valid,geometry,ray,context,h);
- ray.tfar = select(valid,ray.tfar,old_t);
- }
- }
-#endif
-
- /* update occlusion */
- valid0 = valid0 & !valid;
- return valid;
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct Intersect1KEpilogM
- {
- RayHitK<K>& ray;
- size_t k;
- IntersectContext* context;
- const vuint<M>& geomIDs;
- const vuint<M>& primIDs;
-
- __forceinline Intersect1KEpilogM(RayHitK<K>& ray, size_t k,
- IntersectContext* context,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs)
- : ray(ray), k(k), context(context), geomIDs(geomIDs), primIDs(primIDs) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const
- {
- Scene* scene MAYBE_UNUSED = context->scene;
- vbool<Mx> valid = valid_i;
- hit.finalize();
- if (Mx > M) valid &= (1<<M)-1;
- size_t i = select_min(valid,hit.vt);
- assert(i<M);
- unsigned int geomID = geomIDs[i];
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK)
- bool foundhit = false;
- goto entry;
- while (true)
- {
- if (unlikely(none(valid))) return foundhit;
- i = select_min(valid,hit.vt);
- assert(i<M);
- geomID = geomIDs[i];
- entry:
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
-#if defined(EMBREE_RAY_MASK)
- /* goto next hit if mask test fails */
- if ((geometry->mask & ray.mask[k]) == 0) {
- clear(valid,i);
- continue;
- }
-#endif
-
-#if defined(EMBREE_FILTER_FUNCTION)
- /* call intersection filter function */
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) {
- assert(i<M);
- const Vec2f uv = hit.uv(i);
- HitK<K> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i));
- const float old_t = ray.tfar[k];
- ray.tfar[k] = hit.t(i);
- const bool found = any(runIntersectionFilter(vbool<K>(1<<k),geometry,ray,context,h));
- if (!found) ray.tfar[k] = old_t;
- foundhit = foundhit | found;
- clear(valid,i);
- valid &= hit.vt <= ray.tfar[k]; // intersection filters may modify tfar value
- continue;
- }
- }
-#endif
- break;
- }
-#endif
- assert(i<M);
- /* update hit information */
-#if 0 && defined(__AVX512F__) // do not enable, this reduced frequency for BVH4
- ray.updateK(i,k,hit.vt,hit.vu,hit.vv,vfloat<Mx>(hit.vNg.x),vfloat<Mx>(hit.vNg.y),vfloat<Mx>(hit.vNg.z),geomID,vuint<Mx>(primIDs));
-#else
- const Vec2f uv = hit.uv(i);
- ray.tfar[k] = hit.t(i);
- ray.Ng.x[k] = hit.vNg.x[i];
- ray.Ng.y[k] = hit.vNg.y[i];
- ray.Ng.z[k] = hit.vNg.z[i];
- ray.u[k] = uv.x;
- ray.v[k] = uv.y;
- ray.primID[k] = primIDs[i];
- ray.geomID[k] = geomID;
- instance_id_stack::copy<const unsigned*, vuint<K>*, const size_t&>(context->user->instID, ray.instID, k);
-#endif
- return true;
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct Occluded1KEpilogM
- {
- RayK<K>& ray;
- size_t k;
- IntersectContext* context;
- const vuint<M>& geomIDs;
- const vuint<M>& primIDs;
-
- __forceinline Occluded1KEpilogM(RayK<K>& ray, size_t k,
- IntersectContext* context,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs)
- : ray(ray), k(k), context(context), geomIDs(geomIDs), primIDs(primIDs) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const
- {
- Scene* scene MAYBE_UNUSED = context->scene;
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK)
- if (unlikely(filter))
- hit.finalize(); /* called only once */
-
- vbool<Mx> valid = valid_i;
- if (Mx > M) valid &= (1<<M)-1;
- size_t m=movemask(valid);
- goto entry;
- while (true)
- {
- if (unlikely(m == 0)) return false;
- entry:
- size_t i=bsf(m);
-
- const unsigned int geomID = geomIDs[i];
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
-#if defined(EMBREE_RAY_MASK)
- /* goto next hit if mask test fails */
- if ((geometry->mask & ray.mask[k]) == 0) {
- m=btc(m,i);
- continue;
- }
-#endif
-
-#if defined(EMBREE_FILTER_FUNCTION)
- /* execute occlusion filer */
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter()))
- {
- const Vec2f uv = hit.uv(i);
- const float old_t = ray.tfar[k];
- ray.tfar[k] = hit.t(i);
- HitK<K> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i));
- if (any(runOcclusionFilter(vbool<K>(1<<k),geometry,ray,context,h))) return true;
- ray.tfar[k] = old_t;
- m=btc(m,i);
- continue;
- }
- }
-#endif
- break;
- }
-#endif
- return true;
- }
- };
-
- template<int M, int K, bool filter>
- struct Intersect1KEpilogMU
- {
- RayHitK<K>& ray;
- size_t k;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline Intersect1KEpilogMU(RayHitK<K>& ray, size_t k,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), k(k), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<M>& valid_i, Hit& hit) const
- {
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-#if defined(EMBREE_RAY_MASK)
- /* ray mask test */
- if ((geometry->mask & ray.mask[k]) == 0)
- return false;
-#endif
-
- /* finalize hit calculation */
- vbool<M> valid = valid_i;
- hit.finalize();
- size_t i = select_min(valid,hit.vt);
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter()))
- {
- bool foundhit = false;
- while (true)
- {
- const Vec2f uv = hit.uv(i);
- const float old_t = ray.tfar[k];
- ray.tfar[k] = hit.t(i);
- HitK<K> h(context->user,geomID,primID,uv.x,uv.y,hit.Ng(i));
- const bool found = any(runIntersectionFilter(vbool<K>(1<<k),geometry,ray,context,h));
- if (!found) ray.tfar[k] = old_t;
- foundhit = foundhit | found;
- clear(valid,i);
- valid &= hit.vt <= ray.tfar[k]; // intersection filters may modify tfar value
- if (unlikely(none(valid))) break;
- i = select_min(valid,hit.vt);
- }
- return foundhit;
- }
- }
-#endif
-
- /* update hit information */
-#if 0 && defined(__AVX512F__) // do not enable, this reduced frequency for BVH4
- const Vec3fa Ng = hit.Ng(i);
- ray.updateK(i,k,hit.vt,hit.vu,hit.vv,vfloat<M>(Ng.x),vfloat<M>(Ng.y),vfloat<M>(Ng.z),geomID,vuint<M>(primID));
-#else
- const Vec2f uv = hit.uv(i);
- const Vec3fa Ng = hit.Ng(i);
- ray.tfar[k] = hit.t(i);
- ray.Ng.x[k] = Ng.x;
- ray.Ng.y[k] = Ng.y;
- ray.Ng.z[k] = Ng.z;
- ray.u[k] = uv.x;
- ray.v[k] = uv.y;
- ray.primID[k] = primID;
- ray.geomID[k] = geomID;
- instance_id_stack::copy<const unsigned*, vuint<K>*, const size_t&>(context->user->instID, ray.instID, k);
-#endif
- return true;
- }
- };
-
- template<int M, int K, bool filter>
- struct Occluded1KEpilogMU
- {
- RayK<K>& ray;
- size_t k;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline Occluded1KEpilogMU(RayK<K>& ray, size_t k,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), k(k), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<M>& valid_i, Hit& hit) const
- {
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-#if defined(EMBREE_RAY_MASK)
- /* ray mask test */
- if ((geometry->mask & ray.mask[k]) == 0)
- return false;
-#endif
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter()))
- {
- hit.finalize();
- for (size_t m=movemask(valid_i), i=bsf(m); m!=0; m=btc(m,i), i=bsf(m))
- {
- const Vec2f uv = hit.uv(i);
- const float old_t = ray.tfar[k];
- ray.tfar[k] = hit.t(i);
- HitK<K> h(context->user,geomID,primID,uv.x,uv.y,hit.Ng(i));
- if (any(runOcclusionFilter(vbool<K>(1<<k),geometry,ray,context,h))) return true;
- ray.tfar[k] = old_t;
- }
- return false;
- }
- }
-#endif
- return true;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/intersector_iterators.h b/thirdparty/embree-aarch64/kernels/geometry/intersector_iterators.h
deleted file mode 100644
index 5c1ba5cb61..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/intersector_iterators.h
+++ /dev/null
@@ -1,172 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/scene.h"
-#include "../common/ray.h"
-#include "../common/point_query.h"
-#include "../bvh/node_intersector1.h"
-#include "../bvh/node_intersector_packet.h"
-
-namespace embree
-{
- namespace isa
- {
- template<typename Intersector>
- struct ArrayIntersector1
- {
- typedef typename Intersector::Primitive Primitive;
- typedef typename Intersector::Precalculations Precalculations;
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- for (size_t i=0; i<num; i++)
- Intersector::intersect(pre,ray,context,prim[i]);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- for (size_t i=0; i<num; i++) {
- if (Intersector::occluded(pre,ray,context,prim[i]))
- return true;
- }
- return false;
- }
-
- template<int N>
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node)
- {
- bool changed = false;
- for (size_t i=0; i<num; i++)
- changed |= Intersector::pointQuery(query, context, prim[i]);
- return changed;
- }
-
- template<int K>
- static __forceinline void intersectK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
- {
- }
-
- template<int K>
- static __forceinline vbool<K> occludedK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
- {
- return valid;
- }
- };
-
- template<int K, typename Intersector>
- struct ArrayIntersectorK_1
- {
- typedef typename Intersector::Primitive Primitive;
- typedef typename Intersector::Precalculations Precalculations;
-
- template<bool robust>
- static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- for (size_t i=0; i<num; i++) {
- Intersector::intersect(valid,pre,ray,context,prim[i]);
- }
- }
-
- template<bool robust>
- static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- vbool<K> valid0 = valid;
- for (size_t i=0; i<num; i++) {
- valid0 &= !Intersector::occluded(valid0,pre,ray,context,prim[i]);
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- for (size_t i=0; i<num; i++) {
- Intersector::intersect(pre,ray,k,context,prim[i]);
- }
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- for (size_t i=0; i<num; i++) {
- if (Intersector::occluded(pre,ray,k,context,prim[i]))
- return true;
- }
- return false;
- }
- };
-
- // =============================================================================================
-
- template<int K, typename IntersectorK>
- struct ArrayIntersectorKStream
- {
- typedef typename IntersectorK::Primitive PrimitiveK;
- typedef typename IntersectorK::Precalculations PrecalculationsK;
-
- static __forceinline void intersectK(const vbool<K>& valid, const Accel::Intersectors* This, /* PrecalculationsK& pre, */ RayHitK<K>& ray, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node)
- {
- PrecalculationsK pre(valid,ray); // FIXME: might cause trouble
-
- for (size_t i=0; i<num; i++) {
- IntersectorK::intersect(valid,pre,ray,context,prim[i]);
- }
- }
-
- static __forceinline vbool<K> occludedK(const vbool<K>& valid, const Accel::Intersectors* This, /* PrecalculationsK& pre, */ RayK<K>& ray, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node)
- {
- PrecalculationsK pre(valid,ray); // FIXME: might cause trouble
- vbool<K> valid0 = valid;
- for (size_t i=0; i<num; i++) {
- valid0 &= !IntersectorK::occluded(valid0,pre,ray,context,prim[i]);
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- static __forceinline void intersect(const Accel::Intersectors* This, RayHitK<K>& ray, size_t k, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node)
- {
- PrecalculationsK pre(ray.tnear() <= ray.tfar,ray); // FIXME: might cause trouble
- for (size_t i=0; i<num; i++) {
- IntersectorK::intersect(pre,ray,k,context,prim[i]);
- }
- }
-
- static __forceinline bool occluded(const Accel::Intersectors* This, RayK<K>& ray, size_t k, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node)
- {
- PrecalculationsK pre(ray.tnear() <= ray.tfar,ray); // FIXME: might cause trouble
- for (size_t i=0; i<num; i++) {
- if (IntersectorK::occluded(pre,ray,k,context,prim[i]))
- return true;
- }
- return false;
- }
-
- static __forceinline size_t occluded(const Accel::Intersectors* This, size_t cur_mask, RayK<K>** __restrict__ inputPackets, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node)
- {
- size_t m_occluded = 0;
- for (size_t i=0; i<num; i++) {
- size_t bits = cur_mask & (~m_occluded);
- for (; bits!=0; )
- {
- const size_t rayID = bscf(bits);
- RayHitK<K> &ray = *inputPackets[rayID / K];
- const size_t k = rayID % K;
- PrecalculationsK pre(ray.tnear() <= ray.tfar,ray); // FIXME: might cause trouble
- if (IntersectorK::occluded(pre,ray,k,context,prim[i]))
- {
- m_occluded |= (size_t)1 << rayID;
- ray.tfar[k] = neg_inf;
- }
- }
- }
- return m_occluded;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/line_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/line_intersector.h
deleted file mode 100644
index eef5b0b1fd..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/line_intersector.h
+++ /dev/null
@@ -1,141 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "curve_intersector_precalculations.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct LineIntersectorHitM
- {
- __forceinline LineIntersectorHitM() {}
-
- __forceinline LineIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng)
- : vu(u), vv(v), vt(t), vNg(Ng) {}
-
- __forceinline void finalize() {}
-
- __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
- __forceinline float t (const size_t i) const { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- public:
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- template<int M>
- struct FlatLinearCurveIntersector1
- {
- typedef CurvePrecalculations1 Precalculations;
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- Ray& ray,
- IntersectContext* context,
- const LineSegments* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
- const Epilog& epilog)
- {
- /* transform end points into ray space */
- vbool<M> valid = valid_i;
- vfloat<M> depth_scale = pre.depth_scale;
- LinearSpace3<Vec3vf<M>> ray_space = pre.ray_space;
-
- const Vec3vf<M> ray_org ((Vec3fa)ray.org);
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i);
-
- Vec4vf<M> p0(xfmVector(ray_space,v0.xyz()-ray_org), v0.w);
- Vec4vf<M> p1(xfmVector(ray_space,v1.xyz()-ray_org), v1.w);
-
- /* approximative intersection with cone */
- const Vec4vf<M> v = p1-p0;
- const Vec4vf<M> w = -p0;
- const vfloat<M> d0 = madd(w.x,v.x,w.y*v.y);
- const vfloat<M> d1 = madd(v.x,v.x,v.y*v.y);
- const vfloat<M> u = clamp(d0*rcp(d1),vfloat<M>(zero),vfloat<M>(one));
- const Vec4vf<M> p = madd(u,v,p0);
- const vfloat<M> t = p.z;
- const vfloat<M> d2 = madd(p.x,p.x,p.y*p.y);
- const vfloat<M> r = p.w;
- const vfloat<M> r2 = r*r;
- valid &= (d2 <= r2) & (vfloat<M>(ray.tnear()) <= t) & (t <= vfloat<M>(ray.tfar));
- if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f)
- valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale; // ignore self intersections
- if (unlikely(none(valid))) return false;
-
- /* ignore denormalized segments */
- const Vec3vf<M> T = v1.xyz()-v0.xyz();
- valid &= (T.x != vfloat<M>(zero)) | (T.y != vfloat<M>(zero)) | (T.z != vfloat<M>(zero));
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- LineIntersectorHitM<M> hit(u,zero,t,T);
- return epilog(valid,hit);
- }
- };
-
- template<int M, int K>
- struct FlatLinearCurveIntersectorK
- {
- typedef CurvePrecalculationsK<K> Precalculations;
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- RayK<K>& ray, size_t k,
- IntersectContext* context,
- const LineSegments* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
- const Epilog& epilog)
- {
- /* transform end points into ray space */
- vbool<M> valid = valid_i;
- vfloat<M> depth_scale = pre.depth_scale[k];
- LinearSpace3<Vec3vf<M>> ray_space = pre.ray_space[k];
- const Vec3vf<M> ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]);
- const Vec3vf<M> ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]);
-
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i);
-
- Vec4vf<M> p0(xfmVector(ray_space,v0.xyz()-ray_org), v0.w);
- Vec4vf<M> p1(xfmVector(ray_space,v1.xyz()-ray_org), v1.w);
-
- /* approximative intersection with cone */
- const Vec4vf<M> v = p1-p0;
- const Vec4vf<M> w = -p0;
- const vfloat<M> d0 = madd(w.x,v.x,w.y*v.y);
- const vfloat<M> d1 = madd(v.x,v.x,v.y*v.y);
- const vfloat<M> u = clamp(d0*rcp(d1),vfloat<M>(zero),vfloat<M>(one));
- const Vec4vf<M> p = madd(u,v,p0);
- const vfloat<M> t = p.z;
- const vfloat<M> d2 = madd(p.x,p.x,p.y*p.y);
- const vfloat<M> r = p.w;
- const vfloat<M> r2 = r*r;
- valid &= (d2 <= r2) & (vfloat<M>(ray.tnear()[k]) <= t) & (t <= vfloat<M>(ray.tfar[k]));
- if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f)
- valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale; // ignore self intersections
- if (unlikely(none(valid))) return false;
-
- /* ignore denormalized segments */
- const Vec3vf<M> T = v1.xyz()-v0.xyz();
- valid &= (T.x != vfloat<M>(zero)) | (T.y != vfloat<M>(zero)) | (T.z != vfloat<M>(zero));
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- LineIntersectorHitM<M> hit(u,zero,t,T);
- return epilog(valid,hit);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/linei.h b/thirdparty/embree-aarch64/kernels/geometry/linei.h
deleted file mode 100644
index a72029ca53..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/linei.h
+++ /dev/null
@@ -1,709 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-
-namespace embree
-{
- template<int M>
- struct LineMi
- {
- /* Virtual interface to query information about the line segment type */
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* primitive supports multiple time segments */
- static const bool singleTimeSegment = false;
-
- /* Returns maximum number of stored line segments */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N line segments */
- static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
-
- /* Returns required number of bytes for N line segments */
- static __forceinline size_t bytes(size_t N) { return blocks(N)*sizeof(LineMi); }
-
- public:
-
- /* Default constructor */
- __forceinline LineMi() { }
-
- /* Construction from vertices and IDs */
- __forceinline LineMi(const vuint<M>& v0, unsigned short leftExists, unsigned short rightExists, const vuint<M>& geomIDs, const vuint<M>& primIDs, Geometry::GType gtype)
- : gtype((unsigned char)gtype), m((unsigned char)popcnt(vuint<M>(primIDs) != vuint<M>(-1))), sharedGeomID(geomIDs[0]), leftExists (leftExists), rightExists(rightExists), v0(v0), primIDs(primIDs)
- {
- assert(all(vuint<M>(geomID()) == geomIDs));
- }
-
- /* Returns a mask that tells which line segments are valid */
- __forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); }
-
- /* Returns a mask that tells which line segments are valid */
- template<int Mx>
- __forceinline vbool<Mx> valid() const { return vuint<Mx>(primIDs) != vuint<Mx>(-1); }
-
- /* Returns if the specified line segment is valid */
- __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; }
-
- /* Returns the number of stored line segments */
- __forceinline size_t size() const { return bsf(~movemask(valid())); }
-
- /* Returns the geometry IDs */
- //template<class T>
- //static __forceinline T unmask(T &index) { return index & 0x3fffffff; }
-
- __forceinline unsigned int geomID(unsigned int i = 0) const { return sharedGeomID; }
- //__forceinline vuint<M> geomID() { return unmask(geomIDs); }
- //__forceinline const vuint<M> geomID() const { return unmask(geomIDs); }
- //__forceinline unsigned int geomID(const size_t i) const { assert(i<M); return unmask(geomIDs[i]); }
-
- /* Returns the primitive IDs */
- __forceinline vuint<M>& primID() { return primIDs; }
- __forceinline const vuint<M>& primID() const { return primIDs; }
- __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
-
- /* gather the line segments */
- __forceinline void gather(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- const LineSegments* geom) const;
-
- __forceinline void gatheri(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- const LineSegments* geom,
- const int itime) const;
-
- __forceinline void gather(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- const LineSegments* geom,
- float time) const;
-
- /* gather the line segments with lateral info */
- __forceinline void gather(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- Vec4vf<M>& pL,
- Vec4vf<M>& pR,
- const LineSegments* geom) const;
-
- __forceinline void gatheri(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- Vec4vf<M>& pL,
- Vec4vf<M>& pR,
- const LineSegments* geom,
- const int itime) const;
-
- __forceinline void gather(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- Vec4vf<M>& pL,
- Vec4vf<M>& pR,
- const LineSegments* geom,
- float time) const;
-
- __forceinline void gather(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- vbool<M>& cL,
- vbool<M>& cR,
- const LineSegments* geom) const;
-
- __forceinline void gatheri(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- vbool<M>& cL,
- vbool<M>& cR,
- const LineSegments* geom,
- const int itime) const;
-
- __forceinline void gather(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- vbool<M>& cL,
- vbool<M>& cR,
- const LineSegments* geom,
- float time) const;
-
- /* Calculate the bounds of the line segments */
- __forceinline const BBox3fa bounds(const Scene* scene, size_t itime = 0) const
- {
- BBox3fa bounds = empty;
- for (size_t i=0; i<M && valid(i); i++)
- {
- const LineSegments* geom = scene->get<LineSegments>(geomID(i));
- const Vec3ff& p0 = geom->vertex(v0[i]+0,itime);
- const Vec3ff& p1 = geom->vertex(v0[i]+1,itime);
- BBox3fa b = merge(BBox3fa(p0),BBox3fa(p1));
- b = enlarge(b,Vec3fa(max(p0.w,p1.w)));
- bounds.extend(b);
- }
- return bounds;
- }
-
- /* Calculate the linear bounds of the primitive */
- __forceinline LBBox3fa linearBounds(const Scene* scene, size_t itime) {
- return LBBox3fa(bounds(scene,itime+0), bounds(scene,itime+1));
- }
-
- __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps) {
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<M && valid(i); i++)
- {
- const LineSegments* geom = scene->get<LineSegments>(geomID(i));
- allBounds.extend(geom->linearBounds(primID(i), itime, numTimeSteps));
- }
- return allBounds;
- }
-
- __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range)
- {
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<M && valid(i); i++)
- {
- const LineSegments* geom = scene->get<LineSegments>(geomID((unsigned int)i));
- allBounds.extend(geom->linearBounds(primID(i), time_range));
- }
- return allBounds;
- }
-
- /* Fill line segment from line segment list */
- template<typename PrimRefT>
- __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene)
- {
- Geometry::GType gty = scene->get(prims[begin].geomID())->getType();
- vuint<M> geomID, primID;
- vuint<M> v0;
- unsigned short leftExists = 0;
- unsigned short rightExists = 0;
- const PrimRefT* prim = &prims[begin];
-
- for (size_t i=0; i<M; i++)
- {
- const LineSegments* geom = scene->get<LineSegments>(prim->geomID());
- if (begin<end) {
- geomID[i] = prim->geomID();
- primID[i] = prim->primID();
- v0[i] = geom->segment(prim->primID());
- leftExists |= geom->segmentLeftExists(primID[i]) << i;
- rightExists |= geom->segmentRightExists(primID[i]) << i;
- begin++;
- } else {
- assert(i);
- if (i>0) {
- geomID[i] = geomID[i-1];
- primID[i] = -1;
- v0[i] = v0[i-1];
- }
- }
- if (begin<end) prim = &prims[begin]; // FIXME: remove this line
- }
- new (this) LineMi(v0,leftExists,rightExists,geomID,primID,gty); // FIXME: use non temporal store
- }
-
- template<typename BVH, typename Allocator>
- __forceinline static typename BVH::NodeRef createLeaf (BVH* bvh, const PrimRef* prims, const range<size_t>& set, const Allocator& alloc)
- {
- size_t start = set.begin();
- size_t items = LineMi::blocks(set.size());
- size_t numbytes = LineMi::bytes(set.size());
- LineMi* accel = (LineMi*) alloc.malloc1(numbytes,M*sizeof(float));
- for (size_t i=0; i<items; i++) {
- accel[i].fill(prims,start,set.end(),bvh->scene);
- }
- return bvh->encodeLeaf((char*)accel,items);
- };
-
- __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime)
- {
- fill(prims,begin,end,scene);
- return linearBounds(scene,itime);
- }
-
- __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range)
- {
- fill(prims,begin,end,scene);
- return linearBounds(scene,time_range);
- }
-
- template<typename BVH, typename SetMB, typename Allocator>
- __forceinline static typename BVH::NodeRecordMB4D createLeafMB(BVH* bvh, const SetMB& prims, const Allocator& alloc)
- {
- size_t start = prims.begin();
- size_t end = prims.end();
- size_t items = LineMi::blocks(prims.size());
- size_t numbytes = LineMi::bytes(prims.size());
- LineMi* accel = (LineMi*) alloc.malloc1(numbytes,M*sizeof(float));
- const typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,items);
-
- LBBox3fa bounds = empty;
- for (size_t i=0; i<items; i++)
- bounds.extend(accel[i].fillMB(prims.prims->data(),start,end,bvh->scene,prims.time_range));
-
- return typename BVH::NodeRecordMB4D(node,bounds,prims.time_range);
- };
-
- /* Updates the primitive */
- __forceinline BBox3fa update(LineSegments* geom)
- {
- BBox3fa bounds = empty;
- for (size_t i=0; i<M && valid(i); i++)
- {
- const Vec3ff& p0 = geom->vertex(v0[i]+0);
- const Vec3ff& p1 = geom->vertex(v0[i]+1);
- BBox3fa b = merge(BBox3fa(p0),BBox3fa(p1));
- b = enlarge(b,Vec3fa(max(p0.w,p1.w)));
- bounds.extend(b);
- }
- return bounds;
- }
-
- /*! output operator */
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const LineMi& line) {
- return cout << "Line" << M << "i {" << line.v0 << ", " << line.geomID() << ", " << line.primID() << "}";
- }
-
- public:
- unsigned char gtype;
- unsigned char m;
- unsigned int sharedGeomID;
- unsigned short leftExists, rightExists;
- vuint<M> v0; // index of start vertex
- private:
- vuint<M> primIDs; // primitive ID
- };
-
- template<>
- __forceinline void LineMi<4>::gather(Vec4vf4& p0,
- Vec4vf4& p1,
- const LineSegments* geom) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
- transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w);
-
- const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
- const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
- const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
- const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
- transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w);
- }
-
- template<>
- __forceinline void LineMi<4>::gatheri(Vec4vf4& p0,
- Vec4vf4& p1,
- const LineSegments* geom,
- const int itime) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
- transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w);
-
- const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
- const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
- const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
- const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
- transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w);
- }
-
- template<>
- __forceinline void LineMi<4>::gather(Vec4vf4& p0,
- Vec4vf4& p1,
- const LineSegments* geom,
- float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf4 a0,a1;
- gatheri(a0,a1,geom,itime);
- Vec4vf4 b0,b1;
- gatheri(b0,b1,geom,itime+1);
- p0 = lerp(a0,b0,vfloat4(ftime));
- p1 = lerp(a1,b1,vfloat4(ftime));
- }
-
- template<>
- __forceinline void LineMi<4>::gather(Vec4vf4& p0,
- Vec4vf4& p1,
- vbool4& cL,
- vbool4& cR,
- const LineSegments* geom) const
- {
- gather(p0,p1,geom);
- cL = !vbool4(leftExists);
- cR = !vbool4(rightExists);
- }
-
- template<>
- __forceinline void LineMi<4>::gatheri(Vec4vf4& p0,
- Vec4vf4& p1,
- vbool4& cL,
- vbool4& cR,
- const LineSegments* geom,
- const int itime) const
- {
- gatheri(p0,p1,geom,itime);
- cL = !vbool4(leftExists);
- cR = !vbool4(rightExists);
- }
-
- template<>
- __forceinline void LineMi<4>::gather(Vec4vf4& p0,
- Vec4vf4& p1,
- vbool4& cL,
- vbool4& cR,
- const LineSegments* geom,
- float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf4 a0,a1;
- gatheri(a0,a1,geom,itime);
- Vec4vf4 b0,b1;
- gatheri(b0,b1,geom,itime+1);
- p0 = lerp(a0,b0,vfloat4(ftime));
- p1 = lerp(a1,b1,vfloat4(ftime));
- cL = !vbool4(leftExists);
- cR = !vbool4(rightExists);
- }
-
- template<>
- __forceinline void LineMi<4>::gather(Vec4vf4& p0,
- Vec4vf4& p1,
- Vec4vf4& pL,
- Vec4vf4& pR,
- const LineSegments* geom) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
- transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w);
-
- const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
- const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
- const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
- const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
- transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w);
-
- const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1)) : vfloat4(inf);
- const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1)) : vfloat4(inf);
- const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1)) : vfloat4(inf);
- const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1)) : vfloat4(inf);
- transpose(l0,l1,l2,l3,pL.x,pL.y,pL.z,pL.w);
-
- const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2)) : vfloat4(inf);
- const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2)) : vfloat4(inf);
- const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2)) : vfloat4(inf);
- const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2)) : vfloat4(inf);
- transpose(r0,r1,r2,r3,pR.x,pR.y,pR.z,pR.w);
- }
-
- template<>
- __forceinline void LineMi<4>::gatheri(Vec4vf4& p0,
- Vec4vf4& p1,
- Vec4vf4& pL,
- Vec4vf4& pR,
- const LineSegments* geom,
- const int itime) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
- transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w);
-
- const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
- const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
- const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
- const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
- transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w);
-
- const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1,itime)) : vfloat4(inf);
- const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1,itime)) : vfloat4(inf);
- const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1,itime)) : vfloat4(inf);
- const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1,itime)) : vfloat4(inf);
- transpose(l0,l1,l2,l3,pL.x,pL.y,pL.z,pL.w);
-
- const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2,itime)) : vfloat4(inf);
- const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2,itime)) : vfloat4(inf);
- const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2,itime)) : vfloat4(inf);
- const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2,itime)) : vfloat4(inf);
- transpose(r0,r1,r2,r3,pR.x,pR.y,pR.z,pR.w);
- }
-
- template<>
- __forceinline void LineMi<4>::gather(Vec4vf4& p0,
- Vec4vf4& p1,
- Vec4vf4& pL,
- Vec4vf4& pR,
- const LineSegments* geom,
- float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf4 a0,a1,aL,aR;
- gatheri(a0,a1,aL,aR,geom,itime);
- Vec4vf4 b0,b1,bL,bR;
- gatheri(b0,b1,bL,bR,geom,itime+1);
- p0 = lerp(a0,b0,vfloat4(ftime));
- p1 = lerp(a1,b1,vfloat4(ftime));
- pL = lerp(aL,bL,vfloat4(ftime));
- pR = lerp(aR,bR,vfloat4(ftime));
- }
-
-#if defined(__AVX__)
-
- template<>
- __forceinline void LineMi<8>::gather(Vec4vf8& p0,
- Vec4vf8& p1,
- const LineSegments* geom) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
- const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4]));
- const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5]));
- const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6]));
- const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7]));
- transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
-
- const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
- const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
- const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
- const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
- const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1));
- const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1));
- const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1));
- const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1));
- transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
- }
-
- template<>
- __forceinline void LineMi<8>::gatheri(Vec4vf8& p0,
- Vec4vf8& p1,
- const LineSegments* geom,
- const int itime) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
- const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4],itime));
- const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5],itime));
- const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6],itime));
- const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7],itime));
- transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
-
- const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
- const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
- const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
- const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
- const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1,itime));
- const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1,itime));
- const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1,itime));
- const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1,itime));
- transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
- }
-
- template<>
- __forceinline void LineMi<8>::gather(Vec4vf8& p0,
- Vec4vf8& p1,
- const LineSegments* geom,
- float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf8 a0,a1;
- gatheri(a0,a1,geom,itime);
- Vec4vf8 b0,b1;
- gatheri(b0,b1,geom,itime+1);
- p0 = lerp(a0,b0,vfloat8(ftime));
- p1 = lerp(a1,b1,vfloat8(ftime));
- }
-
- template<>
- __forceinline void LineMi<8>::gather(Vec4vf8& p0,
- Vec4vf8& p1,
- Vec4vf8& pL,
- Vec4vf8& pR,
- const LineSegments* geom) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
- const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4]));
- const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5]));
- const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6]));
- const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7]));
- transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
-
- const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
- const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
- const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
- const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
- const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1));
- const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1));
- const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1));
- const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1));
- transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
-
- const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1)) : vfloat4(inf);
- const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1)) : vfloat4(inf);
- const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1)) : vfloat4(inf);
- const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1)) : vfloat4(inf);
- const vfloat4 l4 = (leftExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]-1)) : vfloat4(inf);
- const vfloat4 l5 = (leftExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]-1)) : vfloat4(inf);
- const vfloat4 l6 = (leftExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]-1)) : vfloat4(inf);
- const vfloat4 l7 = (leftExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]-1)) : vfloat4(inf);
- transpose(l0,l1,l2,l3,l4,l5,l6,l7,pL.x,pL.y,pL.z,pL.w);
-
- const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2)) : vfloat4(inf);
- const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2)) : vfloat4(inf);
- const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2)) : vfloat4(inf);
- const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2)) : vfloat4(inf);
- const vfloat4 r4 = (rightExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]+2)) : vfloat4(inf);
- const vfloat4 r5 = (rightExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]+2)) : vfloat4(inf);
- const vfloat4 r6 = (rightExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]+2)) : vfloat4(inf);
- const vfloat4 r7 = (rightExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]+2)) : vfloat4(inf);
- transpose(r0,r1,r2,r3,r4,r5,r6,r7,pR.x,pR.y,pR.z,pR.w);
- }
-
- template<>
- __forceinline void LineMi<8>::gatheri(Vec4vf8& p0,
- Vec4vf8& p1,
- Vec4vf8& pL,
- Vec4vf8& pR,
- const LineSegments* geom,
- const int itime) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
- const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4],itime));
- const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5],itime));
- const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6],itime));
- const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7],itime));
- transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
-
- const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
- const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
- const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
- const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
- const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1,itime));
- const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1,itime));
- const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1,itime));
- const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1,itime));
- transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
-
- const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1,itime)) : vfloat4(inf);
- const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1,itime)) : vfloat4(inf);
- const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1,itime)) : vfloat4(inf);
- const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1,itime)) : vfloat4(inf);
- const vfloat4 l4 = (leftExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]-1,itime)) : vfloat4(inf);
- const vfloat4 l5 = (leftExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]-1,itime)) : vfloat4(inf);
- const vfloat4 l6 = (leftExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]-1,itime)) : vfloat4(inf);
- const vfloat4 l7 = (leftExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]-1,itime)) : vfloat4(inf);
- transpose(l0,l1,l2,l3,l4,l5,l6,l7,pL.x,pL.y,pL.z,pL.w);
-
- const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2,itime)) : vfloat4(inf);
- const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2,itime)) : vfloat4(inf);
- const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2,itime)) : vfloat4(inf);
- const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2,itime)) : vfloat4(inf);
- const vfloat4 r4 = (rightExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]+2,itime)) : vfloat4(inf);
- const vfloat4 r5 = (rightExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]+2,itime)) : vfloat4(inf);
- const vfloat4 r6 = (rightExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]+2,itime)) : vfloat4(inf);
- const vfloat4 r7 = (rightExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]+2,itime)) : vfloat4(inf);
- transpose(r0,r1,r2,r3,r4,r5,r6,r7,pR.x,pR.y,pR.z,pR.w);
- }
-
- template<>
- __forceinline void LineMi<8>::gather(Vec4vf8& p0,
- Vec4vf8& p1,
- Vec4vf8& pL,
- Vec4vf8& pR,
- const LineSegments* geom,
- float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf8 a0,a1,aL,aR;
- gatheri(a0,a1,aL,aR,geom,itime);
- Vec4vf8 b0,b1,bL,bR;
- gatheri(b0,b1,bL,bR,geom,itime+1);
- p0 = lerp(a0,b0,vfloat8(ftime));
- p1 = lerp(a1,b1,vfloat8(ftime));
- pL = lerp(aL,bL,vfloat8(ftime));
- pR = lerp(aR,bR,vfloat8(ftime));
- }
-
- template<>
- __forceinline void LineMi<8>::gather(Vec4vf8& p0,
- Vec4vf8& p1,
- vbool8& cL,
- vbool8& cR,
- const LineSegments* geom) const
- {
- gather(p0,p1,geom);
- cL = !vbool8(leftExists);
- cR = !vbool8(rightExists);
- }
-
- template<>
- __forceinline void LineMi<8>::gatheri(Vec4vf8& p0,
- Vec4vf8& p1,
- vbool8& cL,
- vbool8& cR,
- const LineSegments* geom,
- const int itime) const
- {
- gatheri(p0,p1,geom,itime);
- cL = !vbool8(leftExists);
- cR = !vbool8(rightExists);
- }
-
- template<>
- __forceinline void LineMi<8>::gather(Vec4vf8& p0,
- Vec4vf8& p1,
- vbool8& cL,
- vbool8& cR,
- const LineSegments* geom,
- float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf8 a0,a1;
- gatheri(a0,a1,geom,itime);
- Vec4vf8 b0,b1;
- gatheri(b0,b1,geom,itime+1);
- p0 = lerp(a0,b0,vfloat8(ftime));
- p1 = lerp(a1,b1,vfloat8(ftime));
- cL = !vbool8(leftExists);
- cR = !vbool8(rightExists);
- }
-
-#endif
-
- template<int M>
- typename LineMi<M>::Type LineMi<M>::type;
-
- typedef LineMi<4> Line4i;
- typedef LineMi<8> Line8i;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/linei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/linei_intersector.h
deleted file mode 100644
index a431796a88..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/linei_intersector.h
+++ /dev/null
@@ -1,124 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "linei.h"
-#include "line_intersector.h"
-#include "intersector_epilog.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M, int Mx, bool filter>
- struct FlatLinearCurveMiIntersector1
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1; line.gather(v0,v1,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1; line.gather(v0,v1,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- return FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
- }
- };
-
- template<int M, int Mx, bool filter>
- struct FlatLinearCurveMiMBIntersector1
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time());
- const vbool<Mx> valid = line.template valid<Mx>();
- FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time());
- const vbool<Mx> valid = line.template valid<Mx>();
- return FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct FlatLinearCurveMiIntersectorK
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1; line.gather(v0,v1,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1; line.gather(v0,v1,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- return FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct FlatLinearCurveMiMBIntersectorK
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()[k]);
- const vbool<Mx> valid = line.template valid<Mx>();
- FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()[k]);
- const vbool<Mx> valid = line.template valid<Mx>();
- return FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/object.h b/thirdparty/embree-aarch64/kernels/geometry/object.h
deleted file mode 100644
index f26391de52..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/object.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-
-namespace embree
-{
- struct Object
- {
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* primitive supports multiple time segments */
- static const bool singleTimeSegment = false;
-
- /* Returns maximum number of stored primitives */
- static __forceinline size_t max_size() { return 1; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return N; }
-
- public:
-
- /*! constructs a virtual object */
- Object (unsigned geomID, unsigned primID)
- : _geomID(geomID), _primID(primID) {}
-
- __forceinline unsigned geomID() const {
- return _geomID;
- }
-
- __forceinline unsigned primID() const {
- return _primID;
- }
-
- /*! fill triangle from triangle list */
- __forceinline void fill(const PrimRef* prims, size_t& i, size_t end, Scene* scene)
- {
- const PrimRef& prim = prims[i]; i++;
- new (this) Object(prim.geomID(), prim.primID());
- }
-
- /*! fill triangle from triangle list */
- __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& i, size_t end, Scene* scene, size_t itime)
- {
- const PrimRef& prim = prims[i]; i++;
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- new (this) Object(geomID, primID);
- AccelSet* accel = (AccelSet*) scene->get(geomID);
- return accel->linearBounds(primID,itime);
- }
-
- /*! fill triangle from triangle list */
- __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& i, size_t end, Scene* scene, const BBox1f time_range)
- {
- const PrimRefMB& prim = prims[i]; i++;
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- new (this) Object(geomID, primID);
- AccelSet* accel = (AccelSet*) scene->get(geomID);
- return accel->linearBounds(primID,time_range);
- }
-
- /* Updates the primitive */
- __forceinline BBox3fa update(AccelSet* mesh) {
- return mesh->bounds(primID());
- }
-
- private:
- unsigned int _geomID; //!< geometry ID
- unsigned int _primID; //!< primitive ID
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/object_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/object_intersector.h
deleted file mode 100644
index 97882e0e59..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/object_intersector.h
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "object.h"
-#include "../common/ray.h"
-
-namespace embree
-{
- namespace isa
- {
- template<bool mblur>
- struct ObjectIntersector1
- {
- typedef Object Primitive;
-
- static const bool validIntersectorK = false;
-
- struct Precalculations {
- __forceinline Precalculations() {}
- __forceinline Precalculations (const Ray& ray, const void *ptr) {}
- };
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID());
-
- /* perform ray mask test */
-#if defined(EMBREE_RAY_MASK)
- if ((ray.mask & accel->mask) == 0)
- return;
-#endif
-
- accel->intersect(ray,prim.geomID(),prim.primID(),context,reportIntersection1);
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID());
- /* perform ray mask test */
-#if defined(EMBREE_RAY_MASK)
- if ((ray.mask & accel->mask) == 0)
- return false;
-#endif
-
- accel->occluded(ray,prim.geomID(),prim.primID(),context,&reportOcclusion1);
- return ray.tfar < 0.0f;
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim)
- {
- AccelSet* accel = (AccelSet*)context->scene->get(prim.geomID());
- context->geomID = prim.geomID();
- context->primID = prim.primID();
- return accel->pointQuery(query, context);
- }
-
- template<int K>
- static __forceinline void intersectK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
- {
- assert(false);
- }
-
- template<int K>
- static __forceinline vbool<K> occludedK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
- {
- assert(false);
- return valid;
- }
- };
-
- template<int K, bool mblur>
- struct ObjectIntersectorK
- {
- typedef Object Primitive;
-
- struct Precalculations {
- __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) {}
- };
-
- static __forceinline void intersect(const vbool<K>& valid_i, const Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& prim)
- {
- vbool<K> valid = valid_i;
- AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID());
-
- /* perform ray mask test */
-#if defined(EMBREE_RAY_MASK)
- valid &= (ray.mask & accel->mask) != 0;
- if (none(valid)) return;
-#endif
- accel->intersect(valid,ray,prim.geomID(),prim.primID(),context,&reportIntersection1);
- }
-
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, const Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& prim)
- {
- vbool<K> valid = valid_i;
- AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID());
-
- /* perform ray mask test */
-#if defined(EMBREE_RAY_MASK)
- valid &= (ray.mask & accel->mask) != 0;
- if (none(valid)) return false;
-#endif
- accel->occluded(valid,ray,prim.geomID(),prim.primID(),context,&reportOcclusion1);
- return ray.tfar < 0.0f;
- }
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) {
- intersect(vbool<K>(1<<int(k)),pre,ray,context,prim);
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) {
- occluded(vbool<K>(1<<int(k)),pre,ray,context,prim);
- return ray.tfar[k] < 0.0f;
- }
- };
-
- typedef ObjectIntersectorK<4,false> ObjectIntersector4;
- typedef ObjectIntersectorK<8,false> ObjectIntersector8;
- typedef ObjectIntersectorK<16,false> ObjectIntersector16;
-
- typedef ObjectIntersectorK<4,true> ObjectIntersector4MB;
- typedef ObjectIntersectorK<8,true> ObjectIntersector8MB;
- typedef ObjectIntersectorK<16,true> ObjectIntersector16MB;
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/plane.h b/thirdparty/embree-aarch64/kernels/geometry/plane.h
deleted file mode 100644
index ebe45db558..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/plane.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-
-namespace embree
-{
- namespace isa
- {
- struct HalfPlane
- {
- const Vec3fa P; //!< plane origin
- const Vec3fa N; //!< plane normal
-
- __forceinline HalfPlane(const Vec3fa& P, const Vec3fa& N)
- : P(P), N(N) {}
-
- __forceinline BBox1f intersect(const Vec3fa& ray_org, const Vec3fa& ray_dir) const
- {
- Vec3fa O = Vec3fa(ray_org) - P;
- Vec3fa D = Vec3fa(ray_dir);
- float ON = dot(O,N);
- float DN = dot(D,N);
- bool eps = abs(DN) < min_rcp_input;
- float t = -ON*rcp(DN);
- float lower = select(eps || DN < 0.0f, float(neg_inf), t);
- float upper = select(eps || DN > 0.0f, float(pos_inf), t);
- return BBox1f(lower,upper);
- }
- };
-
- template<int M>
- struct HalfPlaneN
- {
- const Vec3vf<M> P; //!< plane origin
- const Vec3vf<M> N; //!< plane normal
-
- __forceinline HalfPlaneN(const Vec3vf<M>& P, const Vec3vf<M>& N)
- : P(P), N(N) {}
-
- __forceinline BBox<vfloat<M>> intersect(const Vec3fa& ray_org, const Vec3fa& ray_dir) const
- {
- Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray_org) - P;
- Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray_dir);
- vfloat<M> ON = dot(O,N);
- vfloat<M> DN = dot(D,N);
- vbool<M> eps = abs(DN) < min_rcp_input;
- vfloat<M> t = -ON*rcp(DN);
- vfloat<M> lower = select(eps | DN < 0.0f, vfloat<M>(neg_inf), t);
- vfloat<M> upper = select(eps | DN > 0.0f, vfloat<M>(pos_inf), t);
- return BBox<vfloat<M>>(lower,upper);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/pointi.h b/thirdparty/embree-aarch64/kernels/geometry/pointi.h
deleted file mode 100644
index 4ba298e86b..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/pointi.h
+++ /dev/null
@@ -1,417 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-
-namespace embree
-{
- template<int M>
- struct PointMi
- {
- /* Virtual interface to query information about the line segment type */
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
- /* primitive supports multiple time segments */
- static const bool singleTimeSegment = false;
-
- /* Returns maximum number of stored line segments */
- static __forceinline size_t max_size()
- {
- return M;
- }
-
- /* Returns required number of primitive blocks for N line segments */
- static __forceinline size_t blocks(size_t N)
- {
- return (N + max_size() - 1) / max_size();
- }
-
- /* Returns required number of bytes for N line segments */
- static __forceinline size_t bytes(size_t N)
- {
- return blocks(N) * sizeof(PointMi);
- }
-
- public:
- /* Default constructor */
- __forceinline PointMi() {}
-
- /* Construction from vertices and IDs */
- __forceinline PointMi(const vuint<M>& geomIDs, const vuint<M>& primIDs, Geometry::GType gtype, uint32_t numPrimitives)
- : gtype((unsigned char)gtype),
- numPrimitives(numPrimitives),
- sharedGeomID(geomIDs[0]),
- primIDs(primIDs)
- {
- assert(all(vuint<M>(geomID()) == geomIDs));
- }
-
- /* Returns a mask that tells which line segments are valid */
- __forceinline vbool<M> valid() const {
- return vint<M>(step) < vint<M>(numPrimitives);
- }
-
- /* Returns a mask that tells which line segments are valid */
- template<int Mx> __forceinline vbool<Mx> valid() const {
- return vint<Mx>(step) < vint<Mx>(numPrimitives);
- }
-
- /* Returns if the specified line segment is valid */
- __forceinline bool valid(const size_t i) const
- {
- assert(i < M);
- return i < numPrimitives;
- }
-
- /* Returns the number of stored line segments */
- __forceinline size_t size() const {
- return numPrimitives;
- }
-
- __forceinline unsigned int geomID(unsigned int i = 0) const {
- return sharedGeomID;
- }
-
- __forceinline vuint<M>& primID() {
- return primIDs;
- }
- __forceinline const vuint<M>& primID() const {
- return primIDs;
- }
- __forceinline unsigned int primID(const size_t i) const {
- assert(i < M);
- return primIDs[i];
- }
-
- /* gather the line segments */
- __forceinline void gather(Vec4vf<M>& p0, const Points* geom) const;
- __forceinline void gather(Vec4vf<M>& p0, Vec3vf<M>& n0, const Points* geom) const;
-
- __forceinline void gatheri(Vec4vf<M>& p0, const Points* geom, const int itime) const;
- __forceinline void gatheri(Vec4vf<M>& p0, Vec3vf<M>& n0, const Points* geom, const int itime) const;
-
- __forceinline void gather(Vec4vf<M>& p0, const Points* geom, float time) const;
- __forceinline void gather(Vec4vf<M>& p0, Vec3vf<M>& n0, const Points* geom, float time) const;
-
- /* Calculate the bounds of the line segments */
- __forceinline const BBox3fa bounds(const Scene* scene, size_t itime = 0) const
- {
- BBox3fa bounds = empty;
- for (size_t i = 0; i < M && valid(i); i++) {
- const Points* geom = scene->get<Points>(geomID(i));
- bounds.extend(geom->bounds(primID(i),itime));
- }
- return bounds;
- }
-
- /* Calculate the linear bounds of the primitive */
- __forceinline LBBox3fa linearBounds(const Scene* scene, size_t itime) {
- return LBBox3fa(bounds(scene, itime + 0), bounds(scene, itime + 1));
- }
-
- __forceinline LBBox3fa linearBounds(const Scene* const scene, size_t itime, size_t numTimeSteps)
- {
- LBBox3fa allBounds = empty;
- for (size_t i = 0; i < M && valid(i); i++) {
- const Points* geom = scene->get<Points>(geomID(i));
- allBounds.extend(geom->linearBounds(primID(i), itime, numTimeSteps));
- }
- return allBounds;
- }
-
- __forceinline LBBox3fa linearBounds(const Scene* const scene, const BBox1f time_range)
- {
- LBBox3fa allBounds = empty;
- for (size_t i = 0; i < M && valid(i); i++) {
- const Points* geom = scene->get<Points>(geomID((unsigned int)i));
- allBounds.extend(geom->linearBounds(primID(i), time_range));
- }
- return allBounds;
- }
-
- /* Fill line segment from line segment list */
- template<typename PrimRefT>
- __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene)
- {
- Geometry::GType gty = scene->get(prims[begin].geomID())->getType();
- vuint<M> geomID, primID;
- vuint<M> v0;
- const PrimRefT* prim = &prims[begin];
-
- int numPrimitives = 0;
- for (size_t i = 0; i < M; i++) {
- if (begin < end) {
- geomID[i] = prim->geomID();
- primID[i] = prim->primID();
- begin++;
- numPrimitives++;
- } else {
- assert(i);
- if (i > 0) {
- geomID[i] = geomID[i - 1];
- primID[i] = primID[i - 1];
- }
- }
- if (begin < end)
- prim = &prims[begin]; // FIXME: remove this line
- }
- new (this) PointMi(geomID, primID, gty, numPrimitives); // FIXME: use non temporal store
- }
-
- template<typename BVH, typename Allocator>
- __forceinline static typename BVH::NodeRef createLeaf(BVH* bvh,
- const PrimRef* prims,
- const range<size_t>& set,
- const Allocator& alloc)
- {
- size_t start = set.begin();
- size_t items = PointMi::blocks(set.size());
- size_t numbytes = PointMi::bytes(set.size());
- PointMi* accel = (PointMi*)alloc.malloc1(numbytes, M * sizeof(float));
- for (size_t i = 0; i < items; i++) {
- accel[i].fill(prims, start, set.end(), bvh->scene);
- }
- return bvh->encodeLeaf((char*)accel, items);
- };
-
- __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime)
- {
- fill(prims, begin, end, scene);
- return linearBounds(scene, itime);
- }
-
- __forceinline LBBox3fa fillMB(
- const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range)
- {
- fill(prims, begin, end, scene);
- return linearBounds(scene, time_range);
- }
-
- template<typename BVH, typename SetMB, typename Allocator>
- __forceinline static typename BVH::NodeRecordMB4D createLeafMB(BVH* bvh, const SetMB& prims, const Allocator& alloc)
- {
- size_t start = prims.object_range.begin();
- size_t end = prims.object_range.end();
- size_t items = PointMi::blocks(prims.object_range.size());
- size_t numbytes = PointMi::bytes(prims.object_range.size());
- PointMi* accel = (PointMi*)alloc.malloc1(numbytes, M * sizeof(float));
- const typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel, items);
-
- LBBox3fa bounds = empty;
- for (size_t i = 0; i < items; i++)
- bounds.extend(accel[i].fillMB(prims.prims->data(), start, end, bvh->scene, prims.time_range));
-
- return typename BVH::NodeRecordMB4D(node, bounds, prims.time_range);
- };
-
- /*! output operator */
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const PointMi& line)
- {
- return cout << "Line" << M << "i {" << line.v0 << ", " << line.geomID() << ", " << line.primID() << "}";
- }
-
- public:
- unsigned char gtype;
- unsigned char numPrimitives;
- unsigned int sharedGeomID;
-
- private:
- vuint<M> primIDs; // primitive ID
- };
-
- template<>
- __forceinline void PointMi<4>::gather(Vec4vf4& p0, const Points* geom) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0)));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1)));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2)));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3)));
- transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w);
- }
-
- template<>
- __forceinline void PointMi<4>::gather(Vec4vf4& p0, Vec3vf4& n0, const Points* geom) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0)));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1)));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2)));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3)));
- transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w);
- const vfloat4 b0 = vfloat4(geom->normal(primID(0)));
- const vfloat4 b1 = vfloat4(geom->normal(primID(1)));
- const vfloat4 b2 = vfloat4(geom->normal(primID(2)));
- const vfloat4 b3 = vfloat4(geom->normal(primID(3)));
- transpose(b0, b1, b2, b3, n0.x, n0.y, n0.z);
- }
-
- template<>
- __forceinline void PointMi<4>::gatheri(Vec4vf4& p0, const Points* geom, const int itime) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime));
- transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w);
- }
-
- template<>
- __forceinline void PointMi<4>::gatheri(Vec4vf4& p0, Vec3vf4& n0, const Points* geom, const int itime) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime));
- transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w);
- const vfloat4 b0 = vfloat4(geom->normal(primID(0), itime));
- const vfloat4 b1 = vfloat4(geom->normal(primID(1), itime));
- const vfloat4 b2 = vfloat4(geom->normal(primID(2), itime));
- const vfloat4 b3 = vfloat4(geom->normal(primID(3), itime));
- transpose(b0, b1, b2, b3, n0.x, n0.y, n0.z);
- }
-
- template<>
- __forceinline void PointMi<4>::gather(Vec4vf4& p0, const Points* geom, float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf4 a0; gatheri(a0, geom, itime);
- Vec4vf4 b0; gatheri(b0, geom, itime + 1);
- p0 = lerp(a0, b0, vfloat4(ftime));
- }
-
- template<>
- __forceinline void PointMi<4>::gather(Vec4vf4& p0, Vec3vf4& n0, const Points* geom, float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf4 a0, b0;
- Vec3vf4 norm0, norm1;
- gatheri(a0, norm0, geom, itime);
- gatheri(b0, norm1, geom, itime + 1);
- p0 = lerp(a0, b0, vfloat4(ftime));
- n0 = lerp(norm0, norm1, vfloat4(ftime));
- }
-
-#if defined(__AVX__)
-
- template<>
- __forceinline void PointMi<8>::gather(Vec4vf8& p0, const Points* geom) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0)));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1)));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2)));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3)));
- const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4)));
- const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5)));
- const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6)));
- const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7)));
- transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w);
- }
-
- template<>
- __forceinline void PointMi<8>::gather(Vec4vf8& p0, Vec3vf8& n0, const Points* geom) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0)));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1)));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2)));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3)));
- const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4)));
- const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5)));
- const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6)));
- const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7)));
- transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w);
- const vfloat4 b0 = vfloat4(geom->normal(primID(0)));
- const vfloat4 b1 = vfloat4(geom->normal(primID(1)));
- const vfloat4 b2 = vfloat4(geom->normal(primID(2)));
- const vfloat4 b3 = vfloat4(geom->normal(primID(3)));
- const vfloat4 b4 = vfloat4(geom->normal(primID(4)));
- const vfloat4 b5 = vfloat4(geom->normal(primID(5)));
- const vfloat4 b6 = vfloat4(geom->normal(primID(6)));
- const vfloat4 b7 = vfloat4(geom->normal(primID(7)));
- transpose(b0, b1, b2, b3, b4, b5, b6, b7, n0.x, n0.y, n0.z);
- }
-
- template<>
- __forceinline void PointMi<8>::gatheri(Vec4vf8& p0, const Points* geom, const int itime) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime));
- const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4), itime));
- const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5), itime));
- const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6), itime));
- const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7), itime));
- transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w);
- }
-
- template<>
- __forceinline void PointMi<8>::gatheri(Vec4vf8& p0, Vec3vf8& n0, const Points* geom, const int itime) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime));
- const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4), itime));
- const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5), itime));
- const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6), itime));
- const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7), itime));
- transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w);
- const vfloat4 b0 = vfloat4(geom->normal(primID(0), itime));
- const vfloat4 b1 = vfloat4(geom->normal(primID(1), itime));
- const vfloat4 b2 = vfloat4(geom->normal(primID(2), itime));
- const vfloat4 b3 = vfloat4(geom->normal(primID(3), itime));
- const vfloat4 b4 = vfloat4(geom->normal(primID(4), itime));
- const vfloat4 b5 = vfloat4(geom->normal(primID(5), itime));
- const vfloat4 b6 = vfloat4(geom->normal(primID(6), itime));
- const vfloat4 b7 = vfloat4(geom->normal(primID(7), itime));
- transpose(b0, b1, b2, b3, b4, b5, b6, b7, n0.x, n0.y, n0.z);
- }
-
- template<>
- __forceinline void PointMi<8>::gather(Vec4vf8& p0, const Points* geom, float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf8 a0;
- gatheri(a0, geom, itime);
- Vec4vf8 b0;
- gatheri(b0, geom, itime + 1);
- p0 = lerp(a0, b0, vfloat8(ftime));
- }
-
- template<>
- __forceinline void PointMi<8>::gather(Vec4vf8& p0, Vec3vf8& n0, const Points* geom, float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf8 a0, b0;
- Vec3vf8 norm0, norm1;
- gatheri(a0, norm0, geom, itime);
- gatheri(b0, norm1, geom, itime + 1);
- p0 = lerp(a0, b0, vfloat8(ftime));
- n0 = lerp(norm0, norm1, vfloat8(ftime));
- }
-#endif
-
- template<int M>
- typename PointMi<M>::Type PointMi<M>::type;
-
- typedef PointMi<4> Point4i;
- typedef PointMi<8> Point8i;
-
-} // namespace embree
diff --git a/thirdparty/embree-aarch64/kernels/geometry/primitive.h b/thirdparty/embree-aarch64/kernels/geometry/primitive.h
deleted file mode 100644
index 41e5b2b304..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/primitive.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/default.h"
-#include "../common/scene.h"
-#include "../../common/simd/simd.h"
-#include "../common/primref.h"
-#include "../common/primref_mb.h"
-
-namespace embree
-{
- struct PrimitiveType
- {
- /*! returns name of this primitive type */
- virtual const char* name() const = 0;
-
- /*! Returns the number of stored active primitives in a block. */
- virtual size_t sizeActive(const char* This) const = 0;
-
- /*! Returns the number of stored active and inactive primitives in a block. */
- virtual size_t sizeTotal(const char* This) const = 0;
-
- /*! Returns the number of bytes of block. */
- virtual size_t getBytes(const char* This) const = 0;
- };
-
- template<typename Primitive>
- struct PrimitivePointQuery1
- {
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim)
- {
- bool changed = false;
- for (size_t i = 0; i < Primitive::max_size(); i++)
- {
- if (!prim.valid(i)) break;
- STAT3(point_query.trav_prims,1,1,1);
- AccelSet* accel = (AccelSet*)context->scene->get(prim.geomID(i));
- context->geomID = prim.geomID(i);
- context->primID = prim.primID(i);
- changed |= accel->pointQuery(query, context);
- }
- return changed;
- }
-
- static __forceinline void pointQueryNoop(PointQuery* query, PointQueryContext* context, const Primitive& prim) { }
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/primitive4.cpp b/thirdparty/embree-aarch64/kernels/geometry/primitive4.cpp
deleted file mode 100644
index f93574c9c8..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/primitive4.cpp
+++ /dev/null
@@ -1,379 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "primitive.h"
-#include "curveNv.h"
-#include "curveNi.h"
-#include "curveNi_mb.h"
-#include "linei.h"
-#include "triangle.h"
-#include "trianglev.h"
-#include "trianglev_mb.h"
-#include "trianglei.h"
-#include "quadv.h"
-#include "quadi.h"
-#include "subdivpatch1.h"
-#include "object.h"
-#include "instance.h"
-#include "subgrid.h"
-
-namespace embree
-{
- /********************** Curve4v **************************/
-
- template<>
- const char* Curve4v::Type::name () const {
- return "curve4v";
- }
-
- template<>
- size_t Curve4v::Type::sizeActive(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return ((Line4i*)This)->size();
- else
- return ((Curve4v*)This)->N;
- }
-
- template<>
- size_t Curve4v::Type::sizeTotal(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return 4;
- else
- return ((Curve4v*)This)->N;
- }
-
- template<>
- size_t Curve4v::Type::getBytes(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return Line4i::bytes(sizeActive(This));
- else
- return Curve4v::bytes(sizeActive(This));
- }
-
- /********************** Curve4i **************************/
-
- template<>
- const char* Curve4i::Type::name () const {
- return "curve4i";
- }
-
- template<>
- size_t Curve4i::Type::sizeActive(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return ((Line4i*)This)->size();
- else
- return ((Curve4i*)This)->N;
- }
-
- template<>
- size_t Curve4i::Type::sizeTotal(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return 4;
- else
- return ((Curve4i*)This)->N;
- }
-
- template<>
- size_t Curve4i::Type::getBytes(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return Line4i::bytes(sizeActive(This));
- else
- return Curve4i::bytes(sizeActive(This));
- }
-
- /********************** Curve4iMB **************************/
-
- template<>
- const char* Curve4iMB::Type::name () const {
- return "curve4imb";
- }
-
- template<>
- size_t Curve4iMB::Type::sizeActive(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return ((Line4i*)This)->size();
- else
- return ((Curve4iMB*)This)->N;
- }
-
- template<>
- size_t Curve4iMB::Type::sizeTotal(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return 4;
- else
- return ((Curve4iMB*)This)->N;
- }
-
- template<>
- size_t Curve4iMB::Type::getBytes(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return Line4i::bytes(sizeActive(This));
- else
- return Curve4iMB::bytes(sizeActive(This));
- }
-
- /********************** Line4i **************************/
-
- template<>
- const char* Line4i::Type::name () const {
- return "line4i";
- }
-
- template<>
- size_t Line4i::Type::sizeActive(const char* This) const {
- return ((Line4i*)This)->size();
- }
-
- template<>
- size_t Line4i::Type::sizeTotal(const char* This) const {
- return 4;
- }
-
- template<>
- size_t Line4i::Type::getBytes(const char* This) const {
- return sizeof(Line4i);
- }
-
- /********************** Triangle4 **************************/
-
- template<>
- const char* Triangle4::Type::name () const {
- return "triangle4";
- }
-
- template<>
- size_t Triangle4::Type::sizeActive(const char* This) const {
- return ((Triangle4*)This)->size();
- }
-
- template<>
- size_t Triangle4::Type::sizeTotal(const char* This) const {
- return 4;
- }
-
- template<>
- size_t Triangle4::Type::getBytes(const char* This) const {
- return sizeof(Triangle4);
- }
-
- /********************** Triangle4v **************************/
-
- template<>
- const char* Triangle4v::Type::name () const {
- return "triangle4v";
- }
-
- template<>
- size_t Triangle4v::Type::sizeActive(const char* This) const {
- return ((Triangle4v*)This)->size();
- }
-
- template<>
- size_t Triangle4v::Type::sizeTotal(const char* This) const {
- return 4;
- }
-
- template<>
- size_t Triangle4v::Type::getBytes(const char* This) const {
- return sizeof(Triangle4v);
- }
-
- /********************** Triangle4i **************************/
-
- template<>
- const char* Triangle4i::Type::name () const {
- return "triangle4i";
- }
-
- template<>
- size_t Triangle4i::Type::sizeActive(const char* This) const {
- return ((Triangle4i*)This)->size();
- }
-
- template<>
- size_t Triangle4i::Type::sizeTotal(const char* This) const {
- return 4;
- }
-
- template<>
- size_t Triangle4i::Type::getBytes(const char* This) const {
- return sizeof(Triangle4i);
- }
-
- /********************** Triangle4vMB **************************/
-
- template<>
- const char* Triangle4vMB::Type::name () const {
- return "triangle4vmb";
- }
-
- template<>
- size_t Triangle4vMB::Type::sizeActive(const char* This) const {
- return ((Triangle4vMB*)This)->size();
- }
-
- template<>
- size_t Triangle4vMB::Type::sizeTotal(const char* This) const {
- return 4;
- }
-
- template<>
- size_t Triangle4vMB::Type::getBytes(const char* This) const {
- return sizeof(Triangle4vMB);
- }
-
- /********************** Quad4v **************************/
-
- template<>
- const char* Quad4v::Type::name () const {
- return "quad4v";
- }
-
- template<>
- size_t Quad4v::Type::sizeActive(const char* This) const {
- return ((Quad4v*)This)->size();
- }
-
- template<>
- size_t Quad4v::Type::sizeTotal(const char* This) const {
- return 4;
- }
-
- template<>
- size_t Quad4v::Type::getBytes(const char* This) const {
- return sizeof(Quad4v);
- }
-
- /********************** Quad4i **************************/
-
- template<>
- const char* Quad4i::Type::name () const {
- return "quad4i";
- }
-
- template<>
- size_t Quad4i::Type::sizeActive(const char* This) const {
- return ((Quad4i*)This)->size();
- }
-
- template<>
- size_t Quad4i::Type::sizeTotal(const char* This) const {
- return 4;
- }
-
- template<>
- size_t Quad4i::Type::getBytes(const char* This) const {
- return sizeof(Quad4i);
- }
-
- /********************** SubdivPatch1 **************************/
-
- const char* SubdivPatch1::Type::name () const {
- return "subdivpatch1";
- }
-
- size_t SubdivPatch1::Type::sizeActive(const char* This) const {
- return 1;
- }
-
- size_t SubdivPatch1::Type::sizeTotal(const char* This) const {
- return 1;
- }
-
- size_t SubdivPatch1::Type::getBytes(const char* This) const {
- return sizeof(SubdivPatch1);
- }
-
- SubdivPatch1::Type SubdivPatch1::type;
-
- /********************** Virtual Object **************************/
-
- const char* Object::Type::name () const {
- return "object";
- }
-
- size_t Object::Type::sizeActive(const char* This) const {
- return 1;
- }
-
- size_t Object::Type::sizeTotal(const char* This) const {
- return 1;
- }
-
- size_t Object::Type::getBytes(const char* This) const {
- return sizeof(Object);
- }
-
- Object::Type Object::type;
-
- /********************** Instance **************************/
-
- const char* InstancePrimitive::Type::name () const {
- return "instance";
- }
-
- size_t InstancePrimitive::Type::sizeActive(const char* This) const {
- return 1;
- }
-
- size_t InstancePrimitive::Type::sizeTotal(const char* This) const {
- return 1;
- }
-
- size_t InstancePrimitive::Type::getBytes(const char* This) const {
- return sizeof(InstancePrimitive);
- }
-
- InstancePrimitive::Type InstancePrimitive::type;
-
- /********************** SubGrid **************************/
-
- const char* SubGrid::Type::name () const {
- return "subgrid";
- }
-
- size_t SubGrid::Type::sizeActive(const char* This) const {
- return 1;
- }
-
- size_t SubGrid::Type::sizeTotal(const char* This) const {
- return 1;
- }
-
- size_t SubGrid::Type::getBytes(const char* This) const {
- return sizeof(SubGrid);
- }
-
- SubGrid::Type SubGrid::type;
-
- /********************** SubGridQBVH4 **************************/
-
- template<>
- const char* SubGridQBVH4::Type::name () const {
- return "SubGridQBVH4";
- }
-
- template<>
- size_t SubGridQBVH4::Type::sizeActive(const char* This) const {
- return 1;
- }
-
- template<>
- size_t SubGridQBVH4::Type::sizeTotal(const char* This) const {
- return 1;
- }
-
- template<>
- size_t SubGridQBVH4::Type::getBytes(const char* This) const {
- return sizeof(SubGridQBVH4);
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/quad_intersector.h
deleted file mode 100644
index 57ff4e60e5..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector.h
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-namespace embree
-{
- namespace isa
- {
- /*! Intersects a ray with a quad with backface culling
- * enabled. The quad v0,v1,v2,v3 is split into two triangles
- * v0,v1,v3 and v2,v3,v1. The edge v1,v2 decides which of the two
- * triangles gets intersected. */
- template<int N>
- __forceinline vbool<N> intersect_quad_backface_culling(const vbool<N>& valid0,
- const Vec3fa& ray_org,
- const Vec3fa& ray_dir,
- const float ray_tnear,
- const float ray_tfar,
- const Vec3vf<N>& quad_v0,
- const Vec3vf<N>& quad_v1,
- const Vec3vf<N>& quad_v2,
- const Vec3vf<N>& quad_v3,
- vfloat<N>& u_o,
- vfloat<N>& v_o,
- vfloat<N>& t_o)
- {
- /* calculate vertices relative to ray origin */
- vbool<N> valid = valid0;
- const Vec3vf<N> O = Vec3vf<N>(ray_org);
- const Vec3vf<N> D = Vec3vf<N>(ray_dir);
- const Vec3vf<N> va = quad_v0-O;
- const Vec3vf<N> vb = quad_v1-O;
- const Vec3vf<N> vc = quad_v2-O;
- const Vec3vf<N> vd = quad_v3-O;
-
- const Vec3vf<N> edb = vb-vd;
- const vfloat<N> WW = dot(cross(vd,edb),D);
- const Vec3vf<N> v0 = select(WW <= 0.0f,va,vc);
- const Vec3vf<N> v1 = select(WW <= 0.0f,vb,vd);
- const Vec3vf<N> v2 = select(WW <= 0.0f,vd,vb);
-
- /* calculate edges */
- const Vec3vf<N> e0 = v2-v0;
- const Vec3vf<N> e1 = v0-v1;
-
- /* perform edge tests */
- const vfloat<N> U = dot(cross(v0,e0),D);
- const vfloat<N> V = dot(cross(v1,e1),D);
- valid &= max(U,V) <= 0.0f;
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<N> Ng = cross(e1,e0);
- const vfloat<N> den = dot(Ng,D);
- const vfloat<N> rcpDen = rcp(den);
-
- /* perform depth test */
- const vfloat<N> t = rcpDen*dot(v0,Ng);
- valid &= vfloat<N>(ray_tnear) <= t & t <= vfloat<N>(ray_tfar);
- if (unlikely(none(valid))) return false;
-
- /* avoid division by 0 */
- valid &= den != vfloat<N>(zero);
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- t_o = t;
- u_o = U * rcpDen;
- v_o = V * rcpDen;
- u_o = select(WW <= 0.0f,u_o,1.0f-u_o);
- v_o = select(WW <= 0.0f,v_o,1.0f-v_o);
- return valid;
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_moeller.h b/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_moeller.h
deleted file mode 100644
index 74e8c7720c..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_moeller.h
+++ /dev/null
@@ -1,566 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "quadv.h"
-#include "triangle_intersector_moeller.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct QuadHitM
- {
- __forceinline QuadHitM() {}
-
- __forceinline QuadHitM(const vbool<M>& valid,
- const vfloat<M>& U,
- const vfloat<M>& V,
- const vfloat<M>& T,
- const vfloat<M>& absDen,
- const Vec3vf<M>& Ng,
- const vbool<M>& flags)
- : U(U), V(V), T(T), absDen(absDen), tri_Ng(Ng), valid(valid), flags(flags) {}
-
- __forceinline void finalize()
- {
- const vfloat<M> rcpAbsDen = rcp(absDen);
- vt = T * rcpAbsDen;
- const vfloat<M> u = min(U * rcpAbsDen,1.0f);
- const vfloat<M> v = min(V * rcpAbsDen,1.0f);
- const vfloat<M> u1 = vfloat<M>(1.0f) - u;
- const vfloat<M> v1 = vfloat<M>(1.0f) - v;
-#if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING)
- vu = select(flags,u1,u);
- vv = select(flags,v1,v);
- vNg = Vec3vf<M>(tri_Ng.x,tri_Ng.y,tri_Ng.z);
-#else
- const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f));
- vv = select(flags,u1,v);
- vu = select(flags,v1,u);
- vNg = Vec3vf<M>(flip*tri_Ng.x,flip*tri_Ng.y,flip*tri_Ng.z);
-#endif
- }
-
- __forceinline Vec2f uv(const size_t i)
- {
- const float u = vu[i];
- const float v = vv[i];
- return Vec2f(u,v);
- }
-
- __forceinline float t(const size_t i) { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- private:
- vfloat<M> U;
- vfloat<M> V;
- vfloat<M> T;
- vfloat<M> absDen;
- Vec3vf<M> tri_Ng;
-
- public:
- vbool<M> valid;
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
-
- public:
- const vbool<M> flags;
- };
-
- template<int K>
- struct QuadHitK
- {
- __forceinline QuadHitK(const vfloat<K>& U,
- const vfloat<K>& V,
- const vfloat<K>& T,
- const vfloat<K>& absDen,
- const Vec3vf<K>& Ng,
- const vbool<K>& flags)
- : U(U), V(V), T(T), absDen(absDen), flags(flags), tri_Ng(Ng) {}
-
- __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
- {
- const vfloat<K> rcpAbsDen = rcp(absDen);
- const vfloat<K> t = T * rcpAbsDen;
- const vfloat<K> u0 = min(U * rcpAbsDen,1.0f);
- const vfloat<K> v0 = min(V * rcpAbsDen,1.0f);
- const vfloat<K> u1 = vfloat<K>(1.0f) - u0;
- const vfloat<K> v1 = vfloat<K>(1.0f) - v0;
- const vfloat<K> u = select(flags,u1,u0);
- const vfloat<K> v = select(flags,v1,v0);
- const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z);
- return std::make_tuple(u,v,t,Ng);
- }
-
- private:
- const vfloat<K> U;
- const vfloat<K> V;
- const vfloat<K> T;
- const vfloat<K> absDen;
- const vbool<K> flags;
- const Vec3vf<K> tri_Ng;
- };
-
- /* ----------------------------- */
- /* -- single ray intersectors -- */
- /* ----------------------------- */
-
-
- template<int M, bool filter>
- struct QuadMIntersector1MoellerTrumbore;
-
- /*! Intersects M quads with 1 ray */
- template<int M, bool filter>
- struct QuadMIntersector1MoellerTrumbore
- {
- __forceinline QuadMIntersector1MoellerTrumbore() {}
-
- __forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {}
-
- __forceinline void intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const vuint<M>& geomID, const vuint<M>& primID) const
- {
- MoellerTrumboreHitM<M> hit;
- MoellerTrumboreIntersector1<M> intersector(ray,nullptr);
- Intersect1EpilogM<M,M,filter> epilog(ray,context,geomID,primID);
-
- /* intersect first triangle */
- if (intersector.intersect(ray,v0,v1,v3,hit))
- epilog(hit.valid,hit);
-
- /* intersect second triangle */
- if (intersector.intersect(ray,v2,v3,v1,hit))
- {
- hit.U = hit.absDen - hit.U;
- hit.V = hit.absDen - hit.V;
- epilog(hit.valid,hit);
- }
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const vuint<M>& geomID, const vuint<M>& primID) const
- {
- MoellerTrumboreHitM<M> hit;
- MoellerTrumboreIntersector1<M> intersector(ray,nullptr);
- Occluded1EpilogM<M,M,filter> epilog(ray,context,geomID,primID);
-
- /* intersect first triangle */
- if (intersector.intersect(ray,v0,v1,v3,hit))
- {
- if (epilog(hit.valid,hit))
- return true;
- }
-
- /* intersect second triangle */
- if (intersector.intersect(ray,v2,v3,v1,hit))
- {
- hit.U = hit.absDen - hit.U;
- hit.V = hit.absDen - hit.V;
- if (epilog(hit.valid,hit))
- return true;
- }
- return false;
- }
- };
-
-#if defined(__AVX512ER__) // KNL
-
- /*! Intersects 4 quads with 1 ray using AVX512 */
- template<bool filter>
- struct QuadMIntersector1MoellerTrumbore<4,filter>
- {
- __forceinline QuadMIntersector1MoellerTrumbore() {}
-
- __forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {}
-
- template<typename Epilog>
- __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
- {
- const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)),
- select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)),
- select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z)));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z));
- const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z));
-#else
- const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)),
- select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)),
- select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z)));
- const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)),
- select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)),
- select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z)));
-#endif
- const vbool16 flags(0xf0f0);
-
- MoellerTrumboreHitM<16> hit;
- MoellerTrumboreIntersector1<16> intersector(ray,nullptr);
- if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,hit)))
- {
- vfloat16 U = hit.U, V = hit.V, absDen = hit.absDen;
-#if !defined(EMBREE_BACKFACE_CULLING)
- hit.U = select(flags,absDen-V,U);
- hit.V = select(flags,absDen-U,V);
- hit.vNg *= select(flags,vfloat16(-1.0f),vfloat16(1.0f)); // FIXME: use XOR
-#else
- hit.U = select(flags,absDen-U,U);
- hit.V = select(flags,absDen-V,V);
-#endif
- if (likely(epilog(hit.valid,hit)))
- return true;
- }
- return false;
- }
-
- __forceinline bool intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID)));
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID)));
- }
- };
-
-#elif defined(__AVX__)
-
- /*! Intersects 4 quads with 1 ray using AVX */
- template<bool filter>
- struct QuadMIntersector1MoellerTrumbore<4,filter>
- {
- __forceinline QuadMIntersector1MoellerTrumbore() {}
-
- __forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {}
-
- template<typename Epilog>
- __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
- {
- const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
- const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
-#else
- const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
- const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
-#endif
- MoellerTrumboreHitM<8> hit;
- MoellerTrumboreIntersector1<8> intersector(ray,nullptr);
- const vbool8 flags(0,0,0,0,1,1,1,1);
- if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,hit)))
- {
- vfloat8 U = hit.U, V = hit.V, absDen = hit.absDen;
-
-#if !defined(EMBREE_BACKFACE_CULLING)
- hit.U = select(flags,absDen-V,U);
- hit.V = select(flags,absDen-U,V);
- hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f)); // FIXME: use XOR
-#else
- hit.U = select(flags,absDen-U,U);
- hit.V = select(flags,absDen-V,V);
-#endif
- if (unlikely(epilog(hit.valid,hit)))
- return true;
- }
- return false;
- }
-
- __forceinline bool intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID)));
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID)));
- }
- };
-
-#endif
-
- /* ----------------------------- */
- /* -- ray packet intersectors -- */
- /* ----------------------------- */
-
-
- struct MoellerTrumboreIntersector1KTriangleM
- {
- /*! Intersect k'th ray from ray packet of size K with M triangles. */
- template<int M, int K, typename Epilog>
- static __forceinline bool intersect(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- const Vec3vf<M>& tri_Ng,
- const vbool<M>& flags,
- const Epilog& epilog)
- {
- /* calculate denominator */
- const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k);
- const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k);
- const Vec3vf<M> C = Vec3vf<M>(tri_v0) - O;
- const Vec3vf<M> R = cross(C,D);
- const vfloat<M> den = dot(Vec3vf<M>(tri_Ng),D);
- const vfloat<M> absDen = abs(den);
- const vfloat<M> sgnDen = signmsk(den);
-
- /* perform edge tests */
- const vfloat<M> U = dot(R,Vec3vf<M>(tri_e2)) ^ sgnDen;
- const vfloat<M> V = dot(R,Vec3vf<M>(tri_e1)) ^ sgnDen;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#else
- vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#endif
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen;
- valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k]));
- if (likely(none(valid))) return false;
-
- /* calculate hit information */
- QuadHitM<M> hit(valid,U,V,T,absDen,tri_Ng,flags);
- return epilog(valid,hit);
- }
-
- template<int M, int K, typename Epilog>
- static __forceinline bool intersect1(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const vbool<M>& flags,
- const Epilog& epilog)
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- const Vec3vf<M> Ng = cross(e2,e1);
- return intersect(ray,k,v0,e1,e2,Ng,flags,epilog);
- }
- };
-
- template<int M, int K, bool filter>
- struct QuadMIntersectorKMoellerTrumboreBase
- {
- __forceinline QuadMIntersectorKMoellerTrumboreBase(const vbool<K>& valid, const RayK<K>& ray) {}
-
- /*! Intersects K rays with one of M triangles. */
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_e1,
- const Vec3vf<K>& tri_e2,
- const Vec3vf<K>& tri_Ng,
- const vbool<K>& flags,
- const Epilog& epilog) const
- {
- /* calculate denominator */
- vbool<K> valid = valid0;
- const Vec3vf<K> C = tri_v0 - ray.org;
- const Vec3vf<K> R = cross(C,ray.dir);
- const vfloat<K> den = dot(tri_Ng,ray.dir);
- const vfloat<K> absDen = abs(den);
- const vfloat<K> sgnDen = signmsk(den);
-
- /* test against edge p2 p0 */
- const vfloat<K> U = dot(R,tri_e2) ^ sgnDen;
- valid &= U >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* test against edge p0 p1 */
- const vfloat<K> V = dot(R,tri_e1) ^ sgnDen;
- valid &= V >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* test against edge p1 p2 */
- const vfloat<K> W = absDen-U-V;
- valid &= W >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen;
- valid &= (absDen*ray.tnear() < T) & (T <= absDen*ray.tfar);
- if (unlikely(none(valid))) return false;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- valid &= den < vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-#else
- valid &= den != vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-#endif
-
- /* calculate hit information */
- QuadHitK<K> hit(U,V,T,absDen,tri_Ng,flags);
- return epilog(valid,hit);
- }
-
- /*! Intersects K rays with one of M quads. */
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_v1,
- const Vec3vf<K>& tri_v2,
- const vbool<K>& flags,
- const Epilog& epilog) const
- {
- const Vec3vf<K> e1 = tri_v0-tri_v1;
- const Vec3vf<K> e2 = tri_v2-tri_v0;
- const Vec3vf<K> Ng = cross(e2,e1);
- return intersectK(valid0,ray,tri_v0,e1,e2,Ng,flags,epilog);
- }
-
- /*! Intersects K rays with one of M quads. */
- template<typename Epilog>
- __forceinline bool intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& v0,
- const Vec3vf<K>& v1,
- const Vec3vf<K>& v2,
- const Vec3vf<K>& v3,
- const Epilog& epilog) const
- {
- intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),epilog);
- if (none(valid0)) return true;
- intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),epilog);
- return none(valid0);
- }
- };
-
- template<int M, int K, bool filter>
- struct QuadMIntersectorKMoellerTrumbore : public QuadMIntersectorKMoellerTrumboreBase<M,K,filter>
- {
- __forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray)
- : QuadMIntersectorKMoellerTrumboreBase<M,K,filter>(valid,ray) {}
-
- __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const vuint<M>& geomID, const vuint<M>& primID) const
- {
- Intersect1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID);
- MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog);
- MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog);
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const vuint<M>& geomID, const vuint<M>& primID) const
- {
- Occluded1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID);
- if (MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog)) return true;
- if (MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog)) return true;
- return false;
- }
- };
-
-
-#if defined(__AVX512ER__) // KNL
-
- /*! Intersects 4 quads with 1 ray using AVX512 */
- template<int K, bool filter>
- struct QuadMIntersectorKMoellerTrumbore<4,K,filter> : public QuadMIntersectorKMoellerTrumboreBase<4,K,filter>
- {
- __forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray)
- : QuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {}
-
- template<typename Epilog>
- __forceinline bool intersect1(RayK<K>& ray, size_t k,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
- {
- const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)),
- select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)),
- select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z)));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z));
- const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z));
-#else
- const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)),
- select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)),
- select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z)));
- const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)),
- select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)),
- select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z)));
-#endif
- const vbool16 flags(0xf0f0);
- return MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog);
- }
-
- __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
- }
- };
-
-#elif defined(__AVX__)
-
- /*! Intersects 4 quads with 1 ray using AVX */
- template<int K, bool filter>
- struct QuadMIntersectorKMoellerTrumbore<4,K,filter> : public QuadMIntersectorKMoellerTrumboreBase<4,K,filter>
- {
- __forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray)
- : QuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {}
-
- template<typename Epilog>
- __forceinline bool intersect1(RayK<K>& ray, size_t k,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
- {
- const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
- const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
-#else
- const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
- const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
-#endif
- const vbool8 flags(0,0,0,0,1,1,1,1);
- return MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog);
- }
-
- __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
- }
- };
-
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_pluecker.h b/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_pluecker.h
deleted file mode 100644
index 7ca3aed0a0..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_pluecker.h
+++ /dev/null
@@ -1,529 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "quad_intersector_moeller.h"
-
-/*! Modified Pluecker ray/triangle intersector. The test first shifts
- * the ray origin into the origin of the coordinate system and then
- * uses Pluecker coordinates for the intersection. Due to the shift,
- * the Pluecker coordinate calculation simplifies and the tests get
- * numerically stable. The edge equations are watertight along the
- * edge for neighboring triangles. */
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct QuadHitPlueckerM
- {
- __forceinline QuadHitPlueckerM() {}
-
- __forceinline QuadHitPlueckerM(const vbool<M>& valid,
- const vfloat<M>& U,
- const vfloat<M>& V,
- const vfloat<M>& UVW,
- const vfloat<M>& t,
- const Vec3vf<M>& Ng,
- const vbool<M>& flags)
- : U(U), V(V), UVW(UVW), tri_Ng(Ng), valid(valid), vt(t), flags(flags) {}
-
- __forceinline void finalize()
- {
- const vbool<M> invalid = abs(UVW) < min_rcp_input;
- const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW));
- const vfloat<M> u = min(U * rcpUVW,1.0f);
- const vfloat<M> v = min(V * rcpUVW,1.0f);
- const vfloat<M> u1 = vfloat<M>(1.0f) - u;
- const vfloat<M> v1 = vfloat<M>(1.0f) - v;
-#if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING)
- vu = select(flags,u1,u);
- vv = select(flags,v1,v);
- vNg = Vec3vf<M>(tri_Ng.x,tri_Ng.y,tri_Ng.z);
-#else
- const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f));
- vv = select(flags,u1,v);
- vu = select(flags,v1,u);
- vNg = Vec3vf<M>(flip*tri_Ng.x,flip*tri_Ng.y,flip*tri_Ng.z);
-#endif
- }
-
- __forceinline Vec2f uv(const size_t i)
- {
- const float u = vu[i];
- const float v = vv[i];
- return Vec2f(u,v);
- }
-
- __forceinline float t(const size_t i) { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- private:
- vfloat<M> U;
- vfloat<M> V;
- vfloat<M> UVW;
- Vec3vf<M> tri_Ng;
-
- public:
- vbool<M> valid;
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
-
- public:
- const vbool<M> flags;
- };
-
- template<int K>
- struct QuadHitPlueckerK
- {
- __forceinline QuadHitPlueckerK(const vfloat<K>& U,
- const vfloat<K>& V,
- const vfloat<K>& UVW,
- const vfloat<K>& t,
- const Vec3vf<K>& Ng,
- const vbool<K>& flags)
- : U(U), V(V), UVW(UVW), t(t), flags(flags), tri_Ng(Ng) {}
-
- __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
- {
- const vbool<K> invalid = abs(UVW) < min_rcp_input;
- const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW));
- const vfloat<K> u0 = min(U * rcpUVW,1.0f);
- const vfloat<K> v0 = min(V * rcpUVW,1.0f);
- const vfloat<K> u1 = vfloat<K>(1.0f) - u0;
- const vfloat<K> v1 = vfloat<K>(1.0f) - v0;
- const vfloat<K> u = select(flags,u1,u0);
- const vfloat<K> v = select(flags,v1,v0);
- const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z);
- return std::make_tuple(u,v,t,Ng);
- }
-
- private:
- const vfloat<K> U;
- const vfloat<K> V;
- const vfloat<K> UVW;
- const vfloat<K> t;
- const vbool<K> flags;
- const Vec3vf<K> tri_Ng;
- };
-
- struct PlueckerIntersectorTriangle1
- {
- template<int M, typename Epilog>
- static __forceinline bool intersect(Ray& ray,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_v1,
- const Vec3vf<M>& tri_v2,
- const vbool<M>& flags,
- const Epilog& epilog)
- {
- /* calculate vertices relative to ray origin */
- const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org);
- const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir);
- const Vec3vf<M> v0 = tri_v0-O;
- const Vec3vf<M> v1 = tri_v1-O;
- const Vec3vf<M> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<M> e0 = v2-v0;
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<M> U = dot(cross(e0,v2+v0),D);
- const vfloat<M> V = dot(cross(e1,v0+v1),D);
- const vfloat<M> W = dot(cross(e2,v1+v2),D);
- const vfloat<M> UVW = U+V+W;
- const vfloat<M> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = max(U,V,W) <= eps;
-#else
- vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<M> den = twice(dot(Ng,D));
-
- /* perform depth test */
- const vfloat<M> T = twice(dot(v0,Ng));
- const vfloat<M> t = rcp(den)*T;
- valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar);
- valid &= den != vfloat<M>(zero);
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- QuadHitPlueckerM<M> hit(valid,U,V,UVW,t,Ng,flags);
- return epilog(valid,hit);
- }
- };
-
- /*! Intersects M quads with 1 ray */
- template<int M, bool filter>
- struct QuadMIntersector1Pluecker
- {
- __forceinline QuadMIntersector1Pluecker() {}
-
- __forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {}
-
- __forceinline void intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const vuint<M>& geomID, const vuint<M>& primID) const
- {
- Intersect1EpilogM<M,M,filter> epilog(ray,context,geomID,primID);
- PlueckerIntersectorTriangle1::intersect(ray,v0,v1,v3,vbool<M>(false),epilog);
- PlueckerIntersectorTriangle1::intersect(ray,v2,v3,v1,vbool<M>(true),epilog);
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const vuint<M>& geomID, const vuint<M>& primID) const
- {
- Occluded1EpilogM<M,M,filter> epilog(ray,context,geomID,primID);
- if (PlueckerIntersectorTriangle1::intersect(ray,v0,v1,v3,vbool<M>(false),epilog)) return true;
- if (PlueckerIntersectorTriangle1::intersect(ray,v2,v3,v1,vbool<M>(true ),epilog)) return true;
- return false;
- }
- };
-
-#if defined(__AVX512ER__) // KNL
-
- /*! Intersects 4 quads with 1 ray using AVX512 */
- template<bool filter>
- struct QuadMIntersector1Pluecker<4,filter>
- {
- __forceinline QuadMIntersector1Pluecker() {}
-
- __forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {}
-
- template<typename Epilog>
- __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
- {
- const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)),
- select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)),
- select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z)));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z));
- const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z));
-#else
- const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)),
- select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)),
- select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z)));
- const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)),
- select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)),
- select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z)));
-#endif
- const vbool16 flags(0xf0f0);
- return PlueckerIntersectorTriangle1::intersect(ray,vtx0,vtx1,vtx2,flags,epilog);
- }
-
- __forceinline bool intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID)));
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID)));
- }
- };
-
-#elif defined(__AVX__)
-
- /*! Intersects 4 quads with 1 ray using AVX */
- template<bool filter>
- struct QuadMIntersector1Pluecker<4,filter>
- {
- __forceinline QuadMIntersector1Pluecker() {}
-
- __forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {}
-
- template<typename Epilog>
- __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
- {
- const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
- const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
-#else
- const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
- const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
-#endif
- const vbool8 flags(0,0,0,0,1,1,1,1);
- return PlueckerIntersectorTriangle1::intersect(ray,vtx0,vtx1,vtx2,flags,epilog);
- }
-
- __forceinline bool intersect(RayHit& ray, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID)));
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID)));
- }
- };
-
-#endif
-
-
- /* ----------------------------- */
- /* -- ray packet intersectors -- */
- /* ----------------------------- */
-
- struct PlueckerIntersector1KTriangleM
- {
- /*! Intersect k'th ray from ray packet of size K with M triangles. */
- template<int M, int K, typename Epilog>
- static __forceinline bool intersect1(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_v1,
- const Vec3vf<M>& tri_v2,
- const vbool<M>& flags,
- const Epilog& epilog)
- {
- /* calculate vertices relative to ray origin */
- const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k);
- const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k);
- const Vec3vf<M> v0 = tri_v0-O;
- const Vec3vf<M> v1 = tri_v1-O;
- const Vec3vf<M> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<M> e0 = v2-v0;
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<M> U = dot(cross(e0,v2+v0),D);
- const vfloat<M> V = dot(cross(e1,v0+v1),D);
- const vfloat<M> W = dot(cross(e2,v1+v2),D);
- const vfloat<M> UVW = U+V+W;
- const vfloat<M> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = max(U,V,W) <= eps;
-#else
- vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<M> den = twice(dot(Ng,D));
-
- /* perform depth test */
- const vfloat<M> T = twice(dot(v0,Ng));
- const vfloat<M> t = rcp(den)*T;
- valid &= vfloat<M>(ray.tnear()[k]) <= t & t <= vfloat<M>(ray.tfar[k]);
- if (unlikely(none(valid))) return false;
-
- /* avoid division by 0 */
- valid &= den != vfloat<M>(zero);
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- QuadHitPlueckerM<M> hit(valid,U,V,UVW,t,Ng,flags);
- return epilog(valid,hit);
- }
- };
-
- template<int M, int K, bool filter>
- struct QuadMIntersectorKPlueckerBase
- {
- __forceinline QuadMIntersectorKPlueckerBase(const vbool<K>& valid, const RayK<K>& ray) {}
-
- /*! Intersects K rays with one of M triangles. */
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_v1,
- const Vec3vf<K>& tri_v2,
- const vbool<K>& flags,
- const Epilog& epilog) const
- {
- /* calculate vertices relative to ray origin */
- vbool<K> valid = valid0;
- const Vec3vf<K> O = ray.org;
- const Vec3vf<K> D = ray.dir;
- const Vec3vf<K> v0 = tri_v0-O;
- const Vec3vf<K> v1 = tri_v1-O;
- const Vec3vf<K> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<K> e0 = v2-v0;
- const Vec3vf<K> e1 = v0-v1;
- const Vec3vf<K> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<K> U = dot(Vec3vf<K>(cross(e0,v2+v0)),D);
- const vfloat<K> V = dot(Vec3vf<K>(cross(e1,v0+v1)),D);
- const vfloat<K> W = dot(Vec3vf<K>(cross(e2,v1+v2)),D);
- const vfloat<K> UVW = U+V+W;
- const vfloat<K> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- valid &= max(U,V,W) <= eps;
-#else
- valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<K> den = twice(dot(Vec3vf<K>(Ng),D));
-
- /* perform depth test */
- const vfloat<K> T = twice(dot(v0,Vec3vf<K>(Ng)));
- const vfloat<K> t = rcp(den)*T;
- valid &= ray.tnear() <= t & t <= ray.tfar;
- valid &= den != vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-
- /* calculate hit information */
- QuadHitPlueckerK<K> hit(U,V,UVW,t,Ng,flags);
- return epilog(valid,hit);
- }
-
- /*! Intersects K rays with one of M quads. */
- template<typename Epilog>
- __forceinline bool intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& v0,
- const Vec3vf<K>& v1,
- const Vec3vf<K>& v2,
- const Vec3vf<K>& v3,
- const Epilog& epilog) const
- {
- intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),epilog);
- if (none(valid0)) return true;
- intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),epilog);
- return none(valid0);
- }
- };
-
- template<int M, int K, bool filter>
- struct QuadMIntersectorKPluecker : public QuadMIntersectorKPlueckerBase<M,K,filter>
- {
- __forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray)
- : QuadMIntersectorKPlueckerBase<M,K,filter>(valid,ray) {}
-
- __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const vuint<M>& geomID, const vuint<M>& primID) const
- {
- Intersect1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID);
- PlueckerIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog);
- PlueckerIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog);
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const vuint<M>& geomID, const vuint<M>& primID) const
- {
- Occluded1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID);
- if (PlueckerIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog)) return true;
- if (PlueckerIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog)) return true;
- return false;
- }
- };
-
-#if defined(__AVX512ER__) // KNL
-
- /*! Intersects 4 quads with 1 ray using AVX512 */
- template<int K, bool filter>
- struct QuadMIntersectorKPluecker<4,K,filter> : public QuadMIntersectorKPlueckerBase<4,K,filter>
- {
- __forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray)
- : QuadMIntersectorKPlueckerBase<4,K,filter>(valid,ray) {}
-
- template<typename Epilog>
- __forceinline bool intersect1(RayK<K>& ray, size_t k, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
- {
- const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)),
- select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)),
- select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z)));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z));
- const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z));
-#else
- const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)),
- select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)),
- select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z)));
- const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)),
- select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)),
- select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z)));
-#endif
-
- const vbool16 flags(0xf0f0);
- return PlueckerIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog);
- }
-
- __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
- }
- };
-
-#elif defined(__AVX__)
-
- /*! Intersects 4 quads with 1 ray using AVX */
- template<int K, bool filter>
- struct QuadMIntersectorKPluecker<4,K,filter> : public QuadMIntersectorKPlueckerBase<4,K,filter>
- {
- __forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray)
- : QuadMIntersectorKPlueckerBase<4,K,filter>(valid,ray) {}
-
- template<typename Epilog>
- __forceinline bool intersect1(RayK<K>& ray, size_t k, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
- {
- const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
- const vbool8 flags(0,0,0,0,1,1,1,1);
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
- const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
-#else
- const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
- const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
-#endif
- return PlueckerIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog);
- }
-
- __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
- }
- };
-
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadi.h b/thirdparty/embree-aarch64/kernels/geometry/quadi.h
deleted file mode 100644
index 741ec519ab..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/quadi.h
+++ /dev/null
@@ -1,483 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-#include "../common/scene.h"
-
-namespace embree
-{
- /* Stores M quads from an indexed face set */
- template <int M>
- struct QuadMi
- {
- /* Virtual interface to query information about the quad type */
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* primitive supports multiple time segments */
- static const bool singleTimeSegment = false;
-
- /* Returns maximum number of stored quads */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
-
- public:
-
- /* Default constructor */
- __forceinline QuadMi() { }
-
- /* Construction from vertices and IDs */
- __forceinline QuadMi(const vuint<M>& v0,
- const vuint<M>& v1,
- const vuint<M>& v2,
- const vuint<M>& v3,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs)
-#if defined(EMBREE_COMPACT_POLYS)
- : geomIDs(geomIDs), primIDs(primIDs) {}
-#else
- : v0_(v0),v1_(v1), v2_(v2), v3_(v3), geomIDs(geomIDs), primIDs(primIDs) {}
-#endif
-
- /* Returns a mask that tells which quads are valid */
- __forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); }
-
- /* Returns if the specified quad is valid */
- __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; }
-
- /* Returns the number of stored quads */
- __forceinline size_t size() const { return bsf(~movemask(valid())); }
-
- /* Returns the geometry IDs */
- __forceinline vuint<M>& geomID() { return geomIDs; }
- __forceinline const vuint<M>& geomID() const { return geomIDs; }
- __forceinline unsigned int geomID(const size_t i) const { assert(i<M); assert(geomIDs[i] != -1); return geomIDs[i]; }
-
- /* Returns the primitive IDs */
- __forceinline vuint<M>& primID() { return primIDs; }
- __forceinline const vuint<M>& primID() const { return primIDs; }
- __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
-
- /* Calculate the bounds of the quads */
- __forceinline const BBox3fa bounds(const Scene *const scene, const size_t itime=0) const
- {
- BBox3fa bounds = empty;
- for (size_t i=0; i<M && valid(i); i++) {
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i));
- bounds.extend(mesh->bounds(primID(i),itime));
- }
- return bounds;
- }
-
- /* Calculate the linear bounds of the primitive */
- __forceinline LBBox3fa linearBounds(const Scene* const scene, const size_t itime) {
- return LBBox3fa(bounds(scene,itime+0),bounds(scene,itime+1));
- }
-
- __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps)
- {
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<M && valid(i); i++)
- {
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i));
- allBounds.extend(mesh->linearBounds(primID(i), itime, numTimeSteps));
- }
- return allBounds;
- }
-
- __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range)
- {
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<M && valid(i); i++)
- {
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i));
- allBounds.extend(mesh->linearBounds(primID(i), time_range));
- }
- return allBounds;
- }
-
- /* Fill quad from quad list */
- template<typename PrimRefT>
- __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene)
- {
- vuint<M> geomID = -1, primID = -1;
- const PrimRefT* prim = &prims[begin];
- vuint<M> v0 = zero, v1 = zero, v2 = zero, v3 = zero;
-
- for (size_t i=0; i<M; i++)
- {
- if (begin<end) {
- geomID[i] = prim->geomID();
- primID[i] = prim->primID();
-#if !defined(EMBREE_COMPACT_POLYS)
- const QuadMesh* mesh = scene->get<QuadMesh>(prim->geomID());
- const QuadMesh::Quad& q = mesh->quad(prim->primID());
- unsigned int_stride = mesh->vertices0.getStride()/4;
- v0[i] = q.v[0] * int_stride;
- v1[i] = q.v[1] * int_stride;
- v2[i] = q.v[2] * int_stride;
- v3[i] = q.v[3] * int_stride;
-#endif
- begin++;
- } else {
- assert(i);
- if (likely(i > 0)) {
- geomID[i] = geomID[0]; // always valid geomIDs
- primID[i] = -1; // indicates invalid data
- v0[i] = v0[0];
- v1[i] = v0[0];
- v2[i] = v0[0];
- v3[i] = v0[0];
- }
- }
- if (begin<end) prim = &prims[begin];
- }
- new (this) QuadMi(v0,v1,v2,v3,geomID,primID); // FIXME: use non temporal store
- }
-
- __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime)
- {
- fill(prims, begin, end, scene);
- return linearBounds(scene, itime);
- }
-
- __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range)
- {
- fill(prims, begin, end, scene);
- return linearBounds(scene, time_range);
- }
-
- friend embree_ostream operator<<(embree_ostream cout, const QuadMi& quad) {
- return cout << "QuadMi<" << M << ">( "
-#if !defined(EMBREE_COMPACT_POLYS)
- << "v0 = " << quad.v0_ << ", v1 = " << quad.v1_ << ", v2 = " << quad.v2_ << ", v3 = " << quad.v3_ << ", "
-#endif
- << "geomID = " << quad.geomIDs << ", primID = " << quad.primIDs << " )";
- }
-
- protected:
-#if !defined(EMBREE_COMPACT_POLYS)
- vuint<M> v0_; // 4 byte offset of 1st vertex
- vuint<M> v1_; // 4 byte offset of 2nd vertex
- vuint<M> v2_; // 4 byte offset of 3rd vertex
- vuint<M> v3_; // 4 byte offset of 4th vertex
-#endif
- vuint<M> geomIDs; // geometry ID of mesh
- vuint<M> primIDs; // primitive ID of primitive inside mesh
- };
-
- namespace isa
- {
-
- template<int M>
- struct QuadMi : public embree::QuadMi<M>
- {
-#if !defined(EMBREE_COMPACT_POLYS)
- using embree::QuadMi<M>::v0_;
- using embree::QuadMi<M>::v1_;
- using embree::QuadMi<M>::v2_;
- using embree::QuadMi<M>::v3_;
-#endif
- using embree::QuadMi<M>::geomIDs;
- using embree::QuadMi<M>::primIDs;
- using embree::QuadMi<M>::geomID;
- using embree::QuadMi<M>::primID;
- using embree::QuadMi<M>::valid;
-
- template<int vid>
- __forceinline Vec3f getVertex(const size_t index, const Scene *const scene) const
- {
-#if defined(EMBREE_COMPACT_POLYS)
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
- const QuadMesh::Quad& quad = mesh->quad(primID(index));
- return (Vec3f) mesh->vertices[0][quad.v[vid]];
-#else
- const vuint<M>& v = getVertexOffset<vid>();
- const float* vertices = scene->vertices[geomID(index)];
- return (Vec3f&) vertices[v[index]];
-#endif
- }
-
- template<int vid, typename T>
- __forceinline Vec3<T> getVertex(const size_t index, const Scene *const scene, const size_t itime, const T& ftime) const
- {
-#if defined(EMBREE_COMPACT_POLYS)
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
- const QuadMesh::Quad& quad = mesh->quad(primID(index));
- const Vec3fa v0 = mesh->vertices[itime+0][quad.v[vid]];
- const Vec3fa v1 = mesh->vertices[itime+1][quad.v[vid]];
-#else
- const vuint<M>& v = getVertexOffset<vid>();
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
- const float* vertices0 = (const float*) mesh->vertexPtr(0,itime+0);
- const float* vertices1 = (const float*) mesh->vertexPtr(0,itime+1);
- const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]);
- const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]);
-#endif
- const Vec3<T> p0(v0.x,v0.y,v0.z);
- const Vec3<T> p1(v1.x,v1.y,v1.z);
- return lerp(p0,p1,ftime);
- }
-
- template<int vid, int K, typename T>
- __forceinline Vec3<T> getVertex(const vbool<K>& valid, const size_t index, const Scene *const scene, const vint<K>& itime, const T& ftime) const
- {
- Vec3<T> p0, p1;
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
-
- for (size_t mask=movemask(valid), i=bsf(mask); mask; mask=btc(mask,i), i=bsf(mask))
- {
-#if defined(EMBREE_COMPACT_POLYS)
- const QuadMesh::Quad& quad = mesh->quad(primID(index));
- const Vec3fa v0 = mesh->vertices[itime[i]+0][quad.v[vid]];
- const Vec3fa v1 = mesh->vertices[itime[i]+1][quad.v[vid]];
-#else
- const vuint<M>& v = getVertexOffset<vid>();
- const float* vertices0 = (const float*) mesh->vertexPtr(0,itime[i]+0);
- const float* vertices1 = (const float*) mesh->vertexPtr(0,itime[i]+1);
- const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]);
- const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]);
-#endif
- p0.x[i] = v0.x; p0.y[i] = v0.y; p0.z[i] = v0.z;
- p1.x[i] = v1.x; p1.y[i] = v1.y; p1.z[i] = v1.z;
- }
- return (T(one)-ftime)*p0 + ftime*p1;
- }
-
- struct Quad {
- vfloat4 v0,v1,v2,v3;
- };
-
-#if defined(EMBREE_COMPACT_POLYS)
-
- __forceinline Quad loadQuad(const int i, const Scene* const scene) const
- {
- const unsigned int geomID = geomIDs[i];
- const unsigned int primID = primIDs[i];
- if (unlikely(primID == -1)) return { zero, zero, zero, zero };
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID);
- const QuadMesh::Quad& quad = mesh->quad(primID);
- const vfloat4 v0 = (vfloat4) mesh->vertices0[quad.v[0]];
- const vfloat4 v1 = (vfloat4) mesh->vertices0[quad.v[1]];
- const vfloat4 v2 = (vfloat4) mesh->vertices0[quad.v[2]];
- const vfloat4 v3 = (vfloat4) mesh->vertices0[quad.v[3]];
- return { v0, v1, v2, v3 };
- }
-
- __forceinline Quad loadQuad(const int i, const int itime, const Scene* const scene) const
- {
- const unsigned int geomID = geomIDs[i];
- const unsigned int primID = primIDs[i];
- if (unlikely(primID == -1)) return { zero, zero, zero, zero };
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID);
- const QuadMesh::Quad& quad = mesh->quad(primID);
- const vfloat4 v0 = (vfloat4) mesh->vertices[itime][quad.v[0]];
- const vfloat4 v1 = (vfloat4) mesh->vertices[itime][quad.v[1]];
- const vfloat4 v2 = (vfloat4) mesh->vertices[itime][quad.v[2]];
- const vfloat4 v3 = (vfloat4) mesh->vertices[itime][quad.v[3]];
- return { v0, v1, v2, v3 };
- }
-
-#else
-
- __forceinline Quad loadQuad(const int i, const Scene* const scene) const
- {
- const float* vertices = scene->vertices[geomID(i)];
- const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]);
- const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]);
- const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]);
- const vfloat4 v3 = vfloat4::loadu(vertices + v3_[i]);
- return { v0, v1, v2, v3 };
- }
-
- __forceinline Quad loadQuad(const int i, const int itime, const Scene* const scene) const
- {
- const unsigned int geomID = geomIDs[i];
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID);
- const float* vertices = (const float*) mesh->vertexPtr(0,itime);
- const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]);
- const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]);
- const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]);
- const vfloat4 v3 = vfloat4::loadu(vertices + v3_[i]);
- return { v0, v1, v2, v3 };
- }
-
-#endif
-
- /* Gather the quads */
- __forceinline void gather(Vec3vf<M>& p0,
- Vec3vf<M>& p1,
- Vec3vf<M>& p2,
- Vec3vf<M>& p3,
- const Scene *const scene) const;
-
-#if defined(__AVX512F__)
- __forceinline void gather(Vec3vf16& p0,
- Vec3vf16& p1,
- Vec3vf16& p2,
- Vec3vf16& p3,
- const Scene *const scene) const;
-#endif
-
- template<int K>
-#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 2000) // workaround for compiler bug in ICC 2019
- __noinline
-#else
- __forceinline
-#endif
- void gather(const vbool<K>& valid,
- Vec3vf<K>& p0,
- Vec3vf<K>& p1,
- Vec3vf<K>& p2,
- Vec3vf<K>& p3,
- const size_t index,
- const Scene* const scene,
- const vfloat<K>& time) const
- {
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
-
- vfloat<K> ftime;
- const vint<K> itime = mesh->timeSegment(time, ftime);
-
- const size_t first = bsf(movemask(valid));
- if (likely(all(valid,itime[first] == itime)))
- {
- p0 = getVertex<0>(index, scene, itime[first], ftime);
- p1 = getVertex<1>(index, scene, itime[first], ftime);
- p2 = getVertex<2>(index, scene, itime[first], ftime);
- p3 = getVertex<3>(index, scene, itime[first], ftime);
- }
- else
- {
- p0 = getVertex<0>(valid, index, scene, itime, ftime);
- p1 = getVertex<1>(valid, index, scene, itime, ftime);
- p2 = getVertex<2>(valid, index, scene, itime, ftime);
- p3 = getVertex<3>(valid, index, scene, itime, ftime);
- }
- }
-
- __forceinline void gather(Vec3vf<M>& p0,
- Vec3vf<M>& p1,
- Vec3vf<M>& p2,
- Vec3vf<M>& p3,
- const QuadMesh* mesh,
- const Scene *const scene,
- const int itime) const;
-
- __forceinline void gather(Vec3vf<M>& p0,
- Vec3vf<M>& p1,
- Vec3vf<M>& p2,
- Vec3vf<M>& p3,
- const Scene *const scene,
- const float time) const;
-
- /* Updates the primitive */
- __forceinline BBox3fa update(QuadMesh* mesh)
- {
- BBox3fa bounds = empty;
- for (size_t i=0; i<M; i++)
- {
- if (!valid(i)) break;
- const unsigned primId = primID(i);
- const QuadMesh::Quad& q = mesh->quad(primId);
- const Vec3fa p0 = mesh->vertex(q.v[0]);
- const Vec3fa p1 = mesh->vertex(q.v[1]);
- const Vec3fa p2 = mesh->vertex(q.v[2]);
- const Vec3fa p3 = mesh->vertex(q.v[3]);
- bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2),BBox3fa(p3)));
- }
- return bounds;
- }
-
- private:
-#if !defined(EMBREE_COMPACT_POLYS)
- template<int N> const vuint<M>& getVertexOffset() const;
-#endif
- };
-
-#if !defined(EMBREE_COMPACT_POLYS)
- template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<0>() const { return v0_; }
- template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<1>() const { return v1_; }
- template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<2>() const { return v2_; }
- template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<3>() const { return v3_; }
-#endif
-
- template<>
- __forceinline void QuadMi<4>::gather(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- Vec3vf4& p3,
- const Scene *const scene) const
- {
- prefetchL1(((char*)this)+0*64);
- prefetchL1(((char*)this)+1*64);
- const Quad tri0 = loadQuad(0,scene);
- const Quad tri1 = loadQuad(1,scene);
- const Quad tri2 = loadQuad(2,scene);
- const Quad tri3 = loadQuad(3,scene);
- transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z);
- transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z);
- transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z);
- transpose(tri0.v3,tri1.v3,tri2.v3,tri3.v3,p3.x,p3.y,p3.z);
- }
-
- template<>
- __forceinline void QuadMi<4>::gather(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- Vec3vf4& p3,
- const QuadMesh* mesh,
- const Scene *const scene,
- const int itime) const
- {
- // FIXME: for trianglei there all geometries are identical, is this the case here too?
-
- const Quad tri0 = loadQuad(0,itime,scene);
- const Quad tri1 = loadQuad(1,itime,scene);
- const Quad tri2 = loadQuad(2,itime,scene);
- const Quad tri3 = loadQuad(3,itime,scene);
- transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z);
- transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z);
- transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z);
- transpose(tri0.v3,tri1.v3,tri2.v3,tri3.v3,p3.x,p3.y,p3.z);
- }
-
- template<>
- __forceinline void QuadMi<4>::gather(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- Vec3vf4& p3,
- const Scene *const scene,
- const float time) const
- {
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(0)); // in mblur mode all geometries are identical
-
- float ftime;
- const int itime = mesh->timeSegment(time, ftime);
-
- Vec3vf4 a0,a1,a2,a3; gather(a0,a1,a2,a3,mesh,scene,itime);
- Vec3vf4 b0,b1,b2,b3; gather(b0,b1,b2,b3,mesh,scene,itime+1);
- p0 = lerp(a0,b0,vfloat4(ftime));
- p1 = lerp(a1,b1,vfloat4(ftime));
- p2 = lerp(a2,b2,vfloat4(ftime));
- p3 = lerp(a3,b3,vfloat4(ftime));
- }
- }
-
- template<int M>
- typename QuadMi<M>::Type QuadMi<M>::type;
-
- typedef QuadMi<4> Quad4i;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadi_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/quadi_intersector.h
deleted file mode 100644
index 96cf7f1ca2..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/quadi_intersector.h
+++ /dev/null
@@ -1,350 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "quadi.h"
-#include "quad_intersector_moeller.h"
-#include "quad_intersector_pluecker.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Intersects M quads with 1 ray */
- template<int M, bool filter>
- struct QuadMiIntersector1Moeller
- {
- typedef QuadMi<M> Primitive;
- typedef QuadMIntersector1MoellerTrumbore<M,filter> Precalculations;
-
- /*! Intersect a ray with the M quads and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
- pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of M quads. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
- return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
- }
- };
-
- /*! Intersects M triangles with K rays. */
- template<int M, int K, bool filter>
- struct QuadMiIntersectorKMoeller
- {
- typedef QuadMi<M> Primitive;
- typedef QuadMIntersectorKMoellerTrumbore<M,K,filter> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMi<M>& quad)
- {
- Scene* scene = context->scene;
- for (size_t i=0; i<QuadMi<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene);
- const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene);
- const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene);
- const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene);
- pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMi<M>& quad)
- {
- Scene* scene = context->scene;
- vbool<K> valid0 = valid_i;
- for (size_t i=0; i<QuadMi<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene);
- const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene);
- const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene);
- const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene);
- if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
- break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
- pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
- return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
- };
-
- /*! Intersects M quads with 1 ray */
- template<int M, bool filter>
- struct QuadMiIntersector1Pluecker
- {
- typedef QuadMi<M> Primitive;
- typedef QuadMIntersector1Pluecker<M,filter> Precalculations;
-
- /*! Intersect a ray with the M quads and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
- pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of M quads. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
- return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
- }
- };
-
- /*! Intersects M triangles with K rays. */
- template<int M, int K, bool filter>
- struct QuadMiIntersectorKPluecker
- {
- typedef QuadMi<M> Primitive;
- typedef QuadMIntersectorKPluecker<M,K,filter> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMi<M>& quad)
- {
- Scene* scene = context->scene;
- for (size_t i=0; i<QuadMi<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene);
- const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene);
- const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene);
- const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene);
- pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMi<M>& quad)
- {
- Scene* scene = context->scene;
- vbool<K> valid0 = valid_i;
- for (size_t i=0; i<QuadMi<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene);
- const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene);
- const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene);
- const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene);
- if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
- break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
- pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
- return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
- };
-
- /*! Intersects M motion blur quads with 1 ray */
- template<int M, bool filter>
- struct QuadMiMBIntersector1Moeller
- {
- typedef QuadMi<M> Primitive;
- typedef QuadMIntersector1MoellerTrumbore<M,filter> Precalculations;
-
- /*! Intersect a ray with the M quads and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time());
- pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of M quads. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time());
- return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
- }
- };
-
- /*! Intersects M motion blur quads with K rays. */
- template<int M, int K, bool filter>
- struct QuadMiMBIntersectorKMoeller
- {
- typedef QuadMi<M> Primitive;
- typedef QuadMIntersectorKMoellerTrumbore<M,K,filter> Precalculations;
-
- /*! Intersects K rays with M quads. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMi<M>& quad)
- {
- for (size_t i=0; i<QuadMi<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time());
- pre.intersectK(valid_i,ray,v0,v1,v2,v3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M quads. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMi<M>& quad)
- {
- vbool<K> valid0 = valid_i;
- for (size_t i=0; i<QuadMi<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time());
- if (pre.intersectK(valid0,ray,v0,v1,v2,v3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
- break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M quads and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]);
- pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of the M quads. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]);
- return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
- };
-
- /*! Intersects M motion blur quads with 1 ray */
- template<int M, bool filter>
- struct QuadMiMBIntersector1Pluecker
- {
- typedef QuadMi<M> Primitive;
- typedef QuadMIntersector1Pluecker<M,filter> Precalculations;
-
- /*! Intersect a ray with the M quads and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time());
- pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of M quads. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time());
- return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
- }
- };
-
- /*! Intersects M motion blur quads with K rays. */
- template<int M, int K, bool filter>
- struct QuadMiMBIntersectorKPluecker
- {
- typedef QuadMi<M> Primitive;
- typedef QuadMIntersectorKPluecker<M,K,filter> Precalculations;
-
- /*! Intersects K rays with M quads. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMi<M>& quad)
- {
- for (size_t i=0; i<QuadMi<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time());
- pre.intersectK(valid_i,ray,v0,v1,v2,v3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M quads. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMi<M>& quad)
- {
- vbool<K> valid0 = valid_i;
- for (size_t i=0; i<QuadMi<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time());
- if (pre.intersectK(valid0,ray,v0,v1,v2,v3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
- break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M quads and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]);
- pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of the M quads. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]);
- return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadv.h b/thirdparty/embree-aarch64/kernels/geometry/quadv.h
deleted file mode 100644
index 0a1fe4d128..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/quadv.h
+++ /dev/null
@@ -1,165 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-
-namespace embree
-{
- /* Stores the vertices of M quads in struct of array layout */
- template <int M>
- struct QuadMv
- {
- public:
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* Returns maximum number of stored quads */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
-
- public:
-
- /* Default constructor */
- __forceinline QuadMv() {}
-
- /* Construction from vertices and IDs */
- __forceinline QuadMv(const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const vuint<M>& geomIDs, const vuint<M>& primIDs)
- : v0(v0), v1(v1), v2(v2), v3(v3), geomIDs(geomIDs), primIDs(primIDs) {}
-
- /* Returns a mask that tells which quads are valid */
- __forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); }
-
- /* Returns true if the specified quad is valid */
- __forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; }
-
- /* Returns the number of stored quads */
- __forceinline size_t size() const { return bsf(~movemask(valid())); }
-
- /* Returns the geometry IDs */
- __forceinline vuint<M>& geomID() { return geomIDs; }
- __forceinline const vuint<M>& geomID() const { return geomIDs; }
- __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; }
-
- /* Returns the primitive IDs */
- __forceinline vuint<M> primID() { return primIDs; }
- __forceinline const vuint<M> primID() const { return primIDs; }
- __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
-
- /* Calculate the bounds of the quads */
- __forceinline BBox3fa bounds() const
- {
- Vec3vf<M> lower = min(v0,v1,v2,v3);
- Vec3vf<M> upper = max(v0,v1,v2,v3);
- vbool<M> mask = valid();
- lower.x = select(mask,lower.x,vfloat<M>(pos_inf));
- lower.y = select(mask,lower.y,vfloat<M>(pos_inf));
- lower.z = select(mask,lower.z,vfloat<M>(pos_inf));
- upper.x = select(mask,upper.x,vfloat<M>(neg_inf));
- upper.y = select(mask,upper.y,vfloat<M>(neg_inf));
- upper.z = select(mask,upper.z,vfloat<M>(neg_inf));
- return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)),
- Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z)));
- }
-
- /* Non temporal store */
- __forceinline static void store_nt(QuadMv* dst, const QuadMv& src)
- {
- vfloat<M>::store_nt(&dst->v0.x,src.v0.x);
- vfloat<M>::store_nt(&dst->v0.y,src.v0.y);
- vfloat<M>::store_nt(&dst->v0.z,src.v0.z);
- vfloat<M>::store_nt(&dst->v1.x,src.v1.x);
- vfloat<M>::store_nt(&dst->v1.y,src.v1.y);
- vfloat<M>::store_nt(&dst->v1.z,src.v1.z);
- vfloat<M>::store_nt(&dst->v2.x,src.v2.x);
- vfloat<M>::store_nt(&dst->v2.y,src.v2.y);
- vfloat<M>::store_nt(&dst->v2.z,src.v2.z);
- vfloat<M>::store_nt(&dst->v3.x,src.v3.x);
- vfloat<M>::store_nt(&dst->v3.y,src.v3.y);
- vfloat<M>::store_nt(&dst->v3.z,src.v3.z);
- vuint<M>::store_nt(&dst->geomIDs,src.geomIDs);
- vuint<M>::store_nt(&dst->primIDs,src.primIDs);
- }
-
- /* Fill quad from quad list */
- __forceinline void fill(const PrimRef* prims, size_t& begin, size_t end, Scene* scene)
- {
- vuint<M> vgeomID = -1, vprimID = -1;
- Vec3vf<M> v0 = zero, v1 = zero, v2 = zero, v3 = zero;
-
- for (size_t i=0; i<M && begin<end; i++, begin++)
- {
- const PrimRef& prim = prims[begin];
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const QuadMesh* __restrict__ const mesh = scene->get<QuadMesh>(geomID);
- const QuadMesh::Quad& quad = mesh->quad(primID);
- const Vec3fa& p0 = mesh->vertex(quad.v[0]);
- const Vec3fa& p1 = mesh->vertex(quad.v[1]);
- const Vec3fa& p2 = mesh->vertex(quad.v[2]);
- const Vec3fa& p3 = mesh->vertex(quad.v[3]);
- vgeomID [i] = geomID;
- vprimID [i] = primID;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z;
- }
- QuadMv::store_nt(this,QuadMv(v0,v1,v2,v3,vgeomID,vprimID));
- }
-
- /* Updates the primitive */
- __forceinline BBox3fa update(QuadMesh* mesh)
- {
- BBox3fa bounds = empty;
- vuint<M> vgeomID = -1, vprimID = -1;
- Vec3vf<M> v0 = zero, v1 = zero, v2 = zero;
-
- for (size_t i=0; i<M; i++)
- {
- if (primID(i) == -1) break;
- const unsigned geomId = geomID(i);
- const unsigned primId = primID(i);
- const QuadMesh::Quad& quad = mesh->quad(primId);
- const Vec3fa p0 = mesh->vertex(quad.v[0]);
- const Vec3fa p1 = mesh->vertex(quad.v[1]);
- const Vec3fa p2 = mesh->vertex(quad.v[2]);
- const Vec3fa p3 = mesh->vertex(quad.v[3]);
- bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2),BBox3fa(p3)));
- vgeomID [i] = geomId;
- vprimID [i] = primId;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z;
- }
- new (this) QuadMv(v0,v1,v2,v3,vgeomID,vprimID);
- return bounds;
- }
-
- public:
- Vec3vf<M> v0; // 1st vertex of the quads
- Vec3vf<M> v1; // 2nd vertex of the quads
- Vec3vf<M> v2; // 3rd vertex of the quads
- Vec3vf<M> v3; // 4rd vertex of the quads
- private:
- vuint<M> geomIDs; // geometry ID
- vuint<M> primIDs; // primitive ID
- };
-
- template<int M>
- typename QuadMv<M>::Type QuadMv<M>::type;
-
- typedef QuadMv<4> Quad4v;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadv_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/quadv_intersector.h
deleted file mode 100644
index 30a24b291a..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/quadv_intersector.h
+++ /dev/null
@@ -1,181 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "quadv.h"
-#include "quad_intersector_moeller.h"
-#include "quad_intersector_pluecker.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Intersects M quads with 1 ray */
- template<int M, bool filter>
- struct QuadMvIntersector1Moeller
- {
- typedef QuadMv<M> Primitive;
- typedef QuadMIntersector1MoellerTrumbore<M,filter> Precalculations;
-
- /*! Intersect a ray with the M quads and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersect(ray,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of M quads. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.occluded(ray,context, quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
- }
- };
-
- /*! Intersects M triangles with K rays. */
- template<int M, int K, bool filter>
- struct QuadMvIntersectorKMoeller
- {
- typedef QuadMv<M> Primitive;
- typedef QuadMIntersectorKMoellerTrumbore<M,K,filter> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMv<M>& quad)
- {
- for (size_t i=0; i<QuadMv<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i);
- const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i);
- const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i);
- const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i);
- pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMv<M>& quad)
- {
- vbool<K> valid0 = valid_i;
-
- for (size_t i=0; i<QuadMv<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i);
- const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i);
- const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i);
- const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i);
- if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
- break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMv<M>& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersect1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMv<M>& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.occluded1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
- }
- };
-
- /*! Intersects M quads with 1 ray */
- template<int M, bool filter>
- struct QuadMvIntersector1Pluecker
- {
- typedef QuadMv<M> Primitive;
- typedef QuadMIntersector1Pluecker<M,filter> Precalculations;
-
- /*! Intersect a ray with the M quads and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersect(ray,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of M quads. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.occluded(ray,context, quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
- }
- };
-
- /*! Intersects M triangles with K rays. */
- template<int M, int K, bool filter>
- struct QuadMvIntersectorKPluecker
- {
- typedef QuadMv<M> Primitive;
- typedef QuadMIntersectorKPluecker<M,K,filter> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMv<M>& quad)
- {
- for (size_t i=0; i<QuadMv<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i);
- const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i);
- const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i);
- const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i);
- pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMv<M>& quad)
- {
- vbool<K> valid0 = valid_i;
-
- for (size_t i=0; i<QuadMv<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i);
- const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i);
- const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i);
- const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i);
- if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
- break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMv<M>& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersect1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMv<M>& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.occluded1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
- }
- };
- }
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/geometry/roundline_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/roundline_intersector.h
deleted file mode 100644
index cdf68f486b..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/roundline_intersector.h
+++ /dev/null
@@ -1,710 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "curve_intersector_precalculations.h"
-
-
-/*
-
- This file implements the intersection of a ray with a round linear
- curve segment. We define the geometry of such a round linear curve
- segment from point p0 with radius r0 to point p1 with radius r1
- using the cone that touches spheres p0/r0 and p1/r1 tangentially
- plus the sphere p1/r1. We denote the tangentially touching cone from
- p0/r0 to p1/r1 with cone(p0,r0,p1,r1) and the cone plus the ending
- sphere with cone_sphere(p0,r0,p1,r1).
-
- For multiple connected round linear curve segments this construction
- yield a proper shape when viewed from the outside. Using the
- following CSG we can also handle the interiour in most common cases:
-
- round_linear_curve(pl,rl,p0,r0,p1,r1,pr,rr) =
- cone_sphere(p0,r0,p1,r1) - cone(pl,rl,p0,r0) - cone(p1,r1,pr,rr)
-
- Thus by subtracting the neighboring cone geometries, we cut away
- parts of the center cone_sphere surface which lie inside the
- combined curve. This approach works as long as geometry of the
- current cone_sphere penetrates into direct neighbor segments only,
- and not into segments further away.
-
- To construct a cone that touches two spheres at p0 and p1 with r0
- and r1, one has to increase the cone radius at r0 and r1 to obtain
- larger radii w0 and w1, such that the infinite cone properly touches
- the spheres. From the paper "Ray Tracing Generalized Tube
- Primitives: Method and Applications"
- (https://www.researchgate.net/publication/334378683_Ray_Tracing_Generalized_Tube_Primitives_Method_and_Applications)
- one can derive the following equations for these increased
- radii:
-
- sr = 1.0f / sqrt(1-sqr(dr)/sqr(p1-p0))
- w0 = sr*r0
- w1 = sr*r1
-
- Further, we want the cone to start where it touches the sphere at p0
- and to end where it touches sphere at p1. Therefore, we need to
- construct clipping locations y0 and y1 for the start and end of the
- cone. These start and end clipping location of the cone can get
- calculated as:
-
- Y0 = - r0 * (r1-r0) / length(p1-p0)
- Y1 = length(p1-p0) - r1 * (r1-r0) / length(p1-p0)
-
- Where the cone starts a distance Y0 and ends a distance Y1 away of
- point p0 along the cone center. The distance between Y1-Y0 can get
- calculated as:
-
- dY = length(p1-p0) - (r1-r0)^2 / length(p1-p0)
-
- In the code below, Y will always be scaled by length(p1-p0) to
- obtain y and you will find the terms r0*(r1-r0) and
- (p1-p0)^2-(r1-r0)^2.
-
- */
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct RoundLineIntersectorHitM
- {
- __forceinline RoundLineIntersectorHitM() {}
-
- __forceinline RoundLineIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng)
- : vu(u), vv(v), vt(t), vNg(Ng) {}
-
- __forceinline void finalize() {}
-
- __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
- __forceinline float t (const size_t i) const { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- public:
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- namespace __roundline_internal
- {
- template<int M>
- struct ConeGeometry
- {
- ConeGeometry (const Vec4vf<M>& a, const Vec4vf<M>& b)
- : p0(a.xyz()), p1(b.xyz()), dP(p1-p0), dPdP(dot(dP,dP)), r0(a.w), sqr_r0(sqr(r0)), r1(b.w), dr(r1-r0), drdr(dr*dr), r0dr (r0*dr), g(dPdP - drdr) {}
-
- /*
-
- This function tests if a point is accepted by first cone
- clipping plane.
-
- First, we need to project the point onto the line p0->p1:
-
- Y = (p-p0)*(p1-p0)/length(p1-p0)
-
- This value y is the distance to the projection point from
- p0. The clip distances are calculated as:
-
- Y0 = - r0 * (r1-r0) / length(p1-p0)
- Y1 = length(p1-p0) - r1 * (r1-r0) / length(p1-p0)
-
- Thus to test if the point p is accepted by the first
- clipping plane we need to test Y > Y0 and to test if it
- is accepted by the second clipping plane we need to test
- Y < Y1.
-
- By multiplying the calculations with length(p1-p0) these
- calculation can get simplied to:
-
- y = (p-p0)*(p1-p0)
- y0 = - r0 * (r1-r0)
- y1 = (p1-p0)^2 - r1 * (r1-r0)
-
- and the test y > y0 and y < y1.
-
- */
-
- __forceinline vbool<M> isClippedByPlane (const vbool<M>& valid_i, const Vec3vf<M>& p) const
- {
- const Vec3vf<M> p0p = p - p0;
- const vfloat<M> y = dot(p0p,dP);
- const vfloat<M> cap0 = -r0dr;
- const vbool<M> inside_cone = y > cap0;
- return valid_i & (p0.x != vfloat<M>(inf)) & (p1.x != vfloat<M>(inf)) & inside_cone;
- }
-
- /*
-
- This function tests whether a point lies inside the capped cone
- tangential to its ending spheres.
-
- Therefore one has to check if the point is inside the
- region defined by the cone clipping planes, which is
- performed similar as in the previous function.
-
- To perform the inside cone test we need to project the
- point onto the line p0->p1:
-
- dP = p1-p0
- Y = (p-p0)*dP/length(dP)
-
- This value Y is the distance to the projection point from
- p0. To obtain a parameter value u going from 0 to 1 along
- the line p0->p1 we calculate:
-
- U = Y/length(dP)
-
- The radii to use at points p0 and p1 are:
-
- w0 = sr * r0
- w1 = sr * r1
- dw = w1-w0
-
- Using these radii and u one can directly test if the point
- lies inside the cone using the formula dP*dP < wy*wy with:
-
- wy = w0 + u*dw
- py = p0 + u*dP - p
-
- By multiplying the calculations with length(p1-p0) and
- inserting the definition of w can obtain simpler equations:
-
- y = (p-p0)*dP
- ry = r0 + y/dP^2 * dr
- wy = sr*ry
- py = p0 + y/dP^2*dP - p
- y0 = - r0 * dr
- y1 = dP^2 - r1 * dr
-
- Thus for the in-cone test we get:
-
- py^2 < wy^2
- <=> py^2 < sr^2 * ry^2
- <=> py^2 * ( dP^2 - dr^2 ) < dP^2 * ry^2
-
- This can further get simplified to:
-
- (p0-p)^2 * (dP^2 - dr^2) - y^2 < dP^2 * r0^2 + 2.0f*r0*dr*y;
-
- */
-
- __forceinline vbool<M> isInsideCappedCone (const vbool<M>& valid_i, const Vec3vf<M>& p) const
- {
- const Vec3vf<M> p0p = p - p0;
- const vfloat<M> y = dot(p0p,dP);
- const vfloat<M> cap0 = -r0dr+vfloat<M>(ulp);
- const vfloat<M> cap1 = -r1*dr + dPdP;
-
- vbool<M> inside_cone = valid_i & (p0.x != vfloat<M>(inf)) & (p1.x != vfloat<M>(inf));
- inside_cone &= y > cap0; // start clipping plane
- inside_cone &= y < cap1; // end clipping plane
- inside_cone &= sqr(p0p)*g - sqr(y) < dPdP * sqr_r0 + 2.0f*r0dr*y; // in cone test
- return inside_cone;
- }
-
- protected:
- Vec3vf<M> p0;
- Vec3vf<M> p1;
- Vec3vf<M> dP;
- vfloat<M> dPdP;
- vfloat<M> r0;
- vfloat<M> sqr_r0;
- vfloat<M> r1;
- vfloat<M> dr;
- vfloat<M> drdr;
- vfloat<M> r0dr;
- vfloat<M> g;
- };
-
- template<int M>
- struct ConeGeometryIntersector : public ConeGeometry<M>
- {
- using ConeGeometry<M>::p0;
- using ConeGeometry<M>::p1;
- using ConeGeometry<M>::dP;
- using ConeGeometry<M>::dPdP;
- using ConeGeometry<M>::r0;
- using ConeGeometry<M>::sqr_r0;
- using ConeGeometry<M>::r1;
- using ConeGeometry<M>::dr;
- using ConeGeometry<M>::r0dr;
- using ConeGeometry<M>::g;
-
- ConeGeometryIntersector (const Vec3vf<M>& ray_org, const Vec3vf<M>& ray_dir, const vfloat<M>& dOdO, const vfloat<M>& rcp_dOdO, const Vec4vf<M>& a, const Vec4vf<M>& b)
- : ConeGeometry<M>(a,b), org(ray_org), O(ray_org-p0), dO(ray_dir), dOdO(dOdO), rcp_dOdO(rcp_dOdO), OdP(dot(dP,O)), dOdP(dot(dP,dO)), yp(OdP + r0dr) {}
-
- /*
-
- This function intersects a ray with a cone that touches a
- start sphere p0/r0 and end sphere p1/r1.
-
- To find this ray/cone intersections one could just
- calculate radii w0 and w1 as described above and use a
- standard ray/cone intersection routine with these
- radii. However, it turns out that calculations can get
- simplified when deriving a specialized ray/cone
- intersection for this special case. We perform
- calculations relative to the cone origin p0 and define:
-
- O = ray_org - p0
- dO = ray_dir
- dP = p1-p0
- dr = r1-r0
- dw = w1-w0
-
- For some t we can compute the potential hit point h = O + t*dO and
- project it onto the cone vector dP to obtain u = (h*dP)/(dP*dP). In
- case of an intersection, the squared distance from the hit point
- projected onto the cone center line to the hit point should be equal
- to the squared cone radius at u:
-
- (u*dP - h)^2 = (w0 + u*dw)^2
-
- Inserting the definition of h, u, w0, and dw into this formula, then
- factoring out all terms, and sorting by t^2, t^1, and t^0 terms
- yields a quadratic equation to solve.
-
- Inserting u:
- ( (h*dP)*dP/dP^2 - h )^2 = ( w0 + (h*dP)*dw/dP^2 )^2
-
- Multiplying by dP^4:
- ( (h*dP)*dP - h*dP^2 )^2 = ( w0*dP^2 + (h*dP)*dw )^2
-
- Inserting w0 and dw:
- ( (h*dP)*dP - h*dP^2 )^2 = ( r0*dP^2 + (h*dP)*dr )^2 / (1-dr^2/dP^2)
- ( (h*dP)*dP - h*dP^2 )^2 *(dP^2 - dr^2) = dP^2 * ( r0*dP^2 + (h*dP)*dr )^2
-
- Now one can insert the definition of h, factor out, and presort by t:
- ( ((O + t*dO)*dP)*dP - (O + t*dO)*dP^2 )^2 *(dP^2 - dr^2) = dP^2 * ( r0*dP^2 + ((O + t*dO)*dP)*dr )^2
- ( (O*dP)*dP-O*dP^2 + t*( (dO*dP)*dP - dO*dP^2 ) )^2 *(dP^2 - dr^2) = dP^2 * ( r0*dP^2 + (O*dP)*dr + t*(dO*dP)*dr )^2
-
- Factoring out further and sorting by t^2, t^1 and t^0 yields:
-
- 0 = t^2 * [ ((dO*dP)*dP - dO-dP^2)^2 * (dP^2 - dr^2) - dP^2*(dO*dP)^2*dr^2 ]
- + 2*t^1 * [ ((O*dP)*dP - O*dP^2) * ((dO*dP)*dP - dO*dP^2) * (dP^2 - dr^2) - dP^2*(r0*dP^2 + (O*dP)*dr)*(dO*dP)*dr ]
- + t^0 * [ ( (O*dP)*dP - O*dP^2)^2 * (dP^2-dr^2) - dP^2*(r0*dP^2 + (O*dP)*dr)^2 ]
-
- This can be simplified to:
-
- 0 = t^2 * [ (dP^2 - dr^2)*dO^2 - (dO*dP)^2 ]
- + 2*t^1 * [ (dP^2 - dr^2)*(O*dO) - (dO*dP)*(O*dP + r0*dr) ]
- + t^0 * [ (dP^2 - dr^2)*O^2 - (O*dP)^2 - r0^2*dP^2 - 2.0f*r0*dr*(O*dP) ]
-
- Solving this quadratic equation yields the values for t at which the
- ray intersects the cone.
-
- */
-
- __forceinline bool intersectCone(vbool<M>& valid, vfloat<M>& lower, vfloat<M>& upper)
- {
- /* return no hit by default */
- lower = pos_inf;
- upper = neg_inf;
-
- /* compute quadratic equation A*t^2 + B*t + C = 0 */
- const vfloat<M> OO = dot(O,O);
- const vfloat<M> OdO = dot(dO,O);
- const vfloat<M> A = g * dOdO - sqr(dOdP);
- const vfloat<M> B = 2.0f * (g*OdO - dOdP*yp);
- const vfloat<M> C = g*OO - sqr(OdP) - sqr_r0*dPdP - 2.0f*r0dr*OdP;
-
- /* we miss the cone if determinant is smaller than zero */
- const vfloat<M> D = B*B - 4.0f*A*C;
- valid &= (D >= 0.0f & g > 0.0f); // if g <= 0 then the cone is inside a sphere end
-
- /* When rays are parallel to the cone surface, then the
- * ray may be inside or outside the cone. We just assume a
- * miss in that case, which is fine as rays inside the
- * cone would anyway hit the ending spheres in that
- * case. */
- valid &= abs(A) > min_rcp_input;
- if (unlikely(none(valid))) {
- return false;
- }
-
- /* compute distance to front and back hit */
- const vfloat<M> Q = sqrt(D);
- const vfloat<M> rcp_2A = rcp(2.0f*A);
- t_cone_front = (-B-Q)*rcp_2A;
- y_cone_front = yp + t_cone_front*dOdP;
- lower = select( (y_cone_front > -(float)ulp) & (y_cone_front <= g) & (g > 0.0f), t_cone_front, vfloat<M>(pos_inf));
-#if !defined (EMBREE_BACKFACE_CULLING_CURVES)
- t_cone_back = (-B+Q)*rcp_2A;
- y_cone_back = yp + t_cone_back *dOdP;
- upper = select( (y_cone_back > -(float)ulp) & (y_cone_back <= g) & (g > 0.0f), t_cone_back , vfloat<M>(neg_inf));
-#endif
- return true;
- }
-
- /*
- This function intersects the ray with the end sphere at
- p1. We already clip away hits that are inside the
- neighboring cone segment.
-
- */
-
- __forceinline void intersectEndSphere(vbool<M>& valid,
- const ConeGeometry<M>& coneR,
- vfloat<M>& lower, vfloat<M>& upper)
- {
- /* calculate front and back hit with end sphere */
- const Vec3vf<M> O1 = org - p1;
- const vfloat<M> O1dO = dot(O1,dO);
- const vfloat<M> h2 = sqr(O1dO) - dOdO*(sqr(O1) - sqr(r1));
- const vfloat<M> rhs1 = select( h2 >= 0.0f, sqrt(h2), vfloat<M>(neg_inf) );
-
- /* clip away front hit if it is inside next cone segment */
- t_sph1_front = (-O1dO - rhs1)*rcp_dOdO;
- const Vec3vf<M> hit_front = org + t_sph1_front*dO;
- vbool<M> valid_sph1_front = h2 >= 0.0f & yp + t_sph1_front*dOdP > g & !coneR.isClippedByPlane (valid, hit_front);
- lower = select(valid_sph1_front, t_sph1_front, vfloat<M>(pos_inf));
-
-#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
- /* clip away back hit if it is inside next cone segment */
- t_sph1_back = (-O1dO + rhs1)*rcp_dOdO;
- const Vec3vf<M> hit_back = org + t_sph1_back*dO;
- vbool<M> valid_sph1_back = h2 >= 0.0f & yp + t_sph1_back*dOdP > g & !coneR.isClippedByPlane (valid, hit_back);
- upper = select(valid_sph1_back, t_sph1_back, vfloat<M>(neg_inf));
-#else
- upper = vfloat<M>(neg_inf);
-#endif
- }
-
- __forceinline void intersectBeginSphere(const vbool<M>& valid,
- vfloat<M>& lower, vfloat<M>& upper)
- {
- /* calculate front and back hit with end sphere */
- const Vec3vf<M> O1 = org - p0;
- const vfloat<M> O1dO = dot(O1,dO);
- const vfloat<M> h2 = sqr(O1dO) - dOdO*(sqr(O1) - sqr(r0));
- const vfloat<M> rhs1 = select( h2 >= 0.0f, sqrt(h2), vfloat<M>(neg_inf) );
-
- /* clip away front hit if it is inside next cone segment */
- t_sph0_front = (-O1dO - rhs1)*rcp_dOdO;
- vbool<M> valid_sph1_front = valid & h2 >= 0.0f & yp + t_sph0_front*dOdP < 0;
- lower = select(valid_sph1_front, t_sph0_front, vfloat<M>(pos_inf));
-
-#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
- /* clip away back hit if it is inside next cone segment */
- t_sph0_back = (-O1dO + rhs1)*rcp_dOdO;
- vbool<M> valid_sph1_back = valid & h2 >= 0.0f & yp + t_sph0_back*dOdP < 0;
- upper = select(valid_sph1_back, t_sph0_back, vfloat<M>(neg_inf));
-#else
- upper = vfloat<M>(neg_inf);
-#endif
- }
-
- /*
-
- This function calculates the geometry normal of some cone hit.
-
- For a given hit point h (relative to p0) with a cone
- starting at p0 with radius w0 and ending at p1 with
- radius w1 one normally calculates the geometry normal by
- first calculating the parmetric u hit location along the
- cone:
-
- u = dot(h,dP)/dP^2
-
- Using this value one can now directly calculate the
- geometry normal by bending the connection vector (h-u*dP)
- from hit to projected hit with some cone dependent value
- dw/sqrt(dP^2) * normalize(dP):
-
- Ng = normalize(h-u*dP) - dw/length(dP) * normalize(dP)
-
- The length of the vector (h-u*dP) can also get calculated
- by interpolating the radii as w0+u*dw which yields:
-
- Ng = (h-u*dP)/(w0+u*dw) - dw/dP^2 * dP
-
- Multiplying with (w0+u*dw) yield a scaled Ng':
-
- Ng' = (h-u*dP) - (w0+u*dw)*dw/dP^2*dP
-
- Inserting the definition of w0 and dw and refactoring
- yield a furhter scaled Ng'':
-
- Ng'' = (dP^2 - dr^2) (h-q) - (r0+u*dr)*dr*dP
-
- Now inserting the definition of u gives and multiplying
- with the denominator yields:
-
- Ng''' = (dP^2-dr^2)*(dP^2*h-dot(h,dP)*dP) - (dP^2*r0+dot(h,dP)*dr)*dr*dP
-
- Factoring out, cancelling terms, dividing by dP^2, and
- factoring again yields finally:
-
- Ng'''' = (dP^2-dr^2)*h - dP*(dot(h,dP) + r0*dr)
-
- */
-
- __forceinline Vec3vf<M> Ng_cone(const vbool<M>& front_hit) const
- {
-#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
- const vfloat<M> y = select(front_hit, y_cone_front, y_cone_back);
- const vfloat<M> t = select(front_hit, t_cone_front, t_cone_back);
- const Vec3vf<M> h = O + t*dO;
- return g*h-dP*y;
-#else
- const Vec3vf<M> h = O + t_cone_front*dO;
- return g*h-dP*y_cone_front;
-#endif
- }
-
- /* compute geometry normal of sphere hit as the difference
- * vector from hit point to sphere center */
-
- __forceinline Vec3vf<M> Ng_sphere1(const vbool<M>& front_hit) const
- {
-#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
- const vfloat<M> t_sph1 = select(front_hit, t_sph1_front, t_sph1_back);
- return org+t_sph1*dO-p1;
-#else
- return org+t_sph1_front*dO-p1;
-#endif
- }
-
- __forceinline Vec3vf<M> Ng_sphere0(const vbool<M>& front_hit) const
- {
-#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
- const vfloat<M> t_sph0 = select(front_hit, t_sph0_front, t_sph0_back);
- return org+t_sph0*dO-p0;
-#else
- return org+t_sph0_front*dO-p0;
-#endif
- }
-
- /*
- This function calculates the u coordinate of a
- hit. Therefore we use the hit distance y (which is zero
- at the first cone clipping plane) and divide by distance
- g between the clipping planes.
-
- */
-
- __forceinline vfloat<M> u_cone(const vbool<M>& front_hit) const
- {
-#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
- const vfloat<M> y = select(front_hit, y_cone_front, y_cone_back);
- return clamp(y*rcp(g));
-#else
- return clamp(y_cone_front*rcp(g));
-#endif
- }
-
- private:
- Vec3vf<M> org;
- Vec3vf<M> O;
- Vec3vf<M> dO;
- vfloat<M> dOdO;
- vfloat<M> rcp_dOdO;
- vfloat<M> OdP;
- vfloat<M> dOdP;
-
- /* for ray/cone intersection */
- private:
- vfloat<M> yp;
- vfloat<M> y_cone_front;
- vfloat<M> t_cone_front;
-#if !defined (EMBREE_BACKFACE_CULLING_CURVES)
- vfloat<M> y_cone_back;
- vfloat<M> t_cone_back;
-#endif
-
- /* for ray/sphere intersection */
- private:
- vfloat<M> t_sph1_front;
- vfloat<M> t_sph0_front;
-#if !defined (EMBREE_BACKFACE_CULLING_CURVES)
- vfloat<M> t_sph1_back;
- vfloat<M> t_sph0_back;
-#endif
- };
-
-
- template<int M, typename Epilog, typename ray_tfar_func>
- static __forceinline bool intersectConeSphere(const vbool<M>& valid_i,
- const Vec3vf<M>& ray_org_in, const Vec3vf<M>& ray_dir,
- const vfloat<M>& ray_tnear, const ray_tfar_func& ray_tfar,
- const Vec4vf<M>& v0, const Vec4vf<M>& v1,
- const Vec4vf<M>& vL, const Vec4vf<M>& vR,
- const Epilog& epilog)
- {
- vbool<M> valid = valid_i;
-
- /* move ray origin closer to make calculations numerically stable */
- const vfloat<M> dOdO = sqr(ray_dir);
- const vfloat<M> rcp_dOdO = rcp(dOdO);
- const Vec3vf<M> center = vfloat<M>(0.5f)*(v0.xyz()+v1.xyz());
- const vfloat<M> dt = dot(center-ray_org_in,ray_dir)*rcp_dOdO;
- const Vec3vf<M> ray_org = ray_org_in + dt*ray_dir;
-
- /* intersect with cone from v0 to v1 */
- vfloat<M> t_cone_lower, t_cone_upper;
- ConeGeometryIntersector<M> cone (ray_org, ray_dir, dOdO, rcp_dOdO, v0, v1);
- vbool<M> validCone = valid;
- cone.intersectCone(validCone, t_cone_lower, t_cone_upper);
-
- valid &= (validCone | (cone.g <= 0.0f)); // if cone is entirely in sphere end - check sphere
- if (unlikely(none(valid)))
- return false;
-
- /* cone hits inside the neighboring capped cones are inside the geometry and thus ignored */
- const ConeGeometry<M> coneL (v0, vL);
- const ConeGeometry<M> coneR (v1, vR);
-#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
- const Vec3vf<M> hit_lower = ray_org + t_cone_lower*ray_dir;
- const Vec3vf<M> hit_upper = ray_org + t_cone_upper*ray_dir;
- t_cone_lower = select (!coneL.isInsideCappedCone (validCone, hit_lower) & !coneR.isInsideCappedCone (validCone, hit_lower), t_cone_lower, vfloat<M>(pos_inf));
- t_cone_upper = select (!coneL.isInsideCappedCone (validCone, hit_upper) & !coneR.isInsideCappedCone (validCone, hit_upper), t_cone_upper, vfloat<M>(neg_inf));
-#endif
-
- /* intersect ending sphere */
- vfloat<M> t_sph1_lower, t_sph1_upper;
- vfloat<M> t_sph0_lower = vfloat<M>(pos_inf);
- vfloat<M> t_sph0_upper = vfloat<M>(neg_inf);
- cone.intersectEndSphere(valid, coneR, t_sph1_lower, t_sph1_upper);
-
- const vbool<M> isBeginPoint = valid & (vL[0] == vfloat<M>(pos_inf));
- if (unlikely(any(isBeginPoint))) {
- cone.intersectBeginSphere (isBeginPoint, t_sph0_lower, t_sph0_upper);
- }
-
- /* CSG union of cone and end sphere */
- vfloat<M> t_sph_lower = min(t_sph0_lower, t_sph1_lower);
- vfloat<M> t_cone_sphere_lower = min(t_cone_lower, t_sph_lower);
-#if !defined (EMBREE_BACKFACE_CULLING_CURVES)
- vfloat<M> t_sph_upper = max(t_sph0_upper, t_sph1_upper);
- vfloat<M> t_cone_sphere_upper = max(t_cone_upper, t_sph_upper);
-
- /* filter out hits that are not in tnear/tfar range */
- const vbool<M> valid_lower = valid & ray_tnear <= dt+t_cone_sphere_lower & dt+t_cone_sphere_lower <= ray_tfar() & t_cone_sphere_lower != vfloat<M>(pos_inf);
- const vbool<M> valid_upper = valid & ray_tnear <= dt+t_cone_sphere_upper & dt+t_cone_sphere_upper <= ray_tfar() & t_cone_sphere_upper != vfloat<M>(neg_inf);
-
- /* check if there is a first hit */
- const vbool<M> valid_first = valid_lower | valid_upper;
- if (unlikely(none(valid_first)))
- return false;
-
- /* construct first hit */
- const vfloat<M> t_first = select(valid_lower, t_cone_sphere_lower, t_cone_sphere_upper);
- const vbool<M> cone_hit_first = t_first == t_cone_lower | t_first == t_cone_upper;
- const vbool<M> sph0_hit_first = t_first == t_sph0_lower | t_first == t_sph0_upper;
- const Vec3vf<M> Ng_first = select(cone_hit_first, cone.Ng_cone(valid_lower), select (sph0_hit_first, cone.Ng_sphere0(valid_lower), cone.Ng_sphere1(valid_lower)));
- const vfloat<M> u_first = select(cone_hit_first, cone.u_cone(valid_lower), select (sph0_hit_first, vfloat<M>(zero), vfloat<M>(one)));
-
- /* invoke intersection filter for first hit */
- RoundLineIntersectorHitM<M> hit(u_first,zero,dt+t_first,Ng_first);
- const bool is_hit_first = epilog(valid_first, hit);
-
- /* check for possible second hits before potentially accepted hit */
- const vfloat<M> t_second = t_cone_sphere_upper;
- const vbool<M> valid_second = valid_lower & valid_upper & (dt+t_cone_sphere_upper <= ray_tfar());
- if (unlikely(none(valid_second)))
- return is_hit_first;
-
- /* invoke intersection filter for second hit */
- const vbool<M> cone_hit_second = t_second == t_cone_lower | t_second == t_cone_upper;
- const vbool<M> sph0_hit_second = t_second == t_sph0_lower | t_second == t_sph0_upper;
- const Vec3vf<M> Ng_second = select(cone_hit_second, cone.Ng_cone(false), select (sph0_hit_second, cone.Ng_sphere0(false), cone.Ng_sphere1(false)));
- const vfloat<M> u_second = select(cone_hit_second, cone.u_cone(false), select (sph0_hit_second, vfloat<M>(zero), vfloat<M>(one)));
-
- hit = RoundLineIntersectorHitM<M>(u_second,zero,dt+t_second,Ng_second);
- const bool is_hit_second = epilog(valid_second, hit);
-
- return is_hit_first | is_hit_second;
-#else
- /* filter out hits that are not in tnear/tfar range */
- const vbool<M> valid_lower = valid & ray_tnear <= dt+t_cone_sphere_lower & dt+t_cone_sphere_lower <= ray_tfar() & t_cone_sphere_lower != vfloat<M>(pos_inf);
-
- /* check if there is a valid hit */
- if (unlikely(none(valid_lower)))
- return false;
-
- /* construct first hit */
- const vbool<M> cone_hit_first = t_cone_sphere_lower == t_cone_lower | t_cone_sphere_lower == t_cone_upper;
- const vbool<M> sph0_hit_first = t_cone_sphere_lower == t_sph0_lower | t_cone_sphere_lower == t_sph0_upper;
- const Vec3vf<M> Ng_first = select(cone_hit_first, cone.Ng_cone(valid_lower), select (sph0_hit_first, cone.Ng_sphere0(valid_lower), cone.Ng_sphere1(valid_lower)));
- const vfloat<M> u_first = select(cone_hit_first, cone.u_cone(valid_lower), select (sph0_hit_first, vfloat<M>(zero), vfloat<M>(one)));
-
- /* invoke intersection filter for first hit */
- RoundLineIntersectorHitM<M> hit(u_first,zero,dt+t_cone_sphere_lower,Ng_first);
- const bool is_hit_first = epilog(valid_lower, hit);
-
- return is_hit_first;
-#endif
- }
-
- } // end namespace __roundline_internal
-
- template<int M>
- struct RoundLinearCurveIntersector1
- {
- typedef CurvePrecalculations1 Precalculations;
-
- struct ray_tfar {
- Ray& ray;
- __forceinline ray_tfar(Ray& ray) : ray(ray) {}
- __forceinline vfloat<M> operator() () const { return ray.tfar; };
- };
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- Ray& ray,
- IntersectContext* context,
- const LineSegments* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
- const Vec4vf<M>& vLi, const Vec4vf<M>& vRi,
- const Epilog& epilog)
- {
- const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
- const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z);
- const vfloat<M> ray_tnear(ray.tnear());
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i);
- const Vec4vf<M> vL = enlargeRadiusToMinWidth(context,geom,ray_org,vLi);
- const Vec4vf<M> vR = enlargeRadiusToMinWidth(context,geom,ray_org,vRi);
- return __roundline_internal::intersectConeSphere(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray),v0,v1,vL,vR,epilog);
- }
- };
-
- template<int M, int K>
- struct RoundLinearCurveIntersectorK
- {
- typedef CurvePrecalculationsK<K> Precalculations;
-
- struct ray_tfar {
- RayK<K>& ray;
- size_t k;
- __forceinline ray_tfar(RayK<K>& ray, size_t k) : ray(ray), k(k) {}
- __forceinline vfloat<M> operator() () const { return ray.tfar[k]; };
- };
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- RayK<K>& ray, size_t k,
- IntersectContext* context,
- const LineSegments* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
- const Vec4vf<M>& vLi, const Vec4vf<M>& vRi,
- const Epilog& epilog)
- {
- const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]);
- const vfloat<M> ray_tnear = ray.tnear()[k];
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i);
- const Vec4vf<M> vL = enlargeRadiusToMinWidth(context,geom,ray_org,vLi);
- const Vec4vf<M> vR = enlargeRadiusToMinWidth(context,geom,ray_org,vRi);
- return __roundline_internal::intersectConeSphere(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray,k),v0,v1,vL,vR,epilog);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/roundlinei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/roundlinei_intersector.h
deleted file mode 100644
index 079817335e..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/roundlinei_intersector.h
+++ /dev/null
@@ -1,136 +0,0 @@
-// ======================================================================== //
-// Copyright 2009-2020 Intel Corporation //
-// //
-// Licensed under the Apache License, Version 2.0 (the "License"); //
-// you may not use this file except in compliance with the License. //
-// You may obtain a copy of the License at //
-// //
-// http://www.apache.org/licenses/LICENSE-2.0 //
-// //
-// Unless required by applicable law or agreed to in writing, software //
-// distributed under the License is distributed on an "AS IS" BASIS, //
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
-// See the License for the specific language governing permissions and //
-// limitations under the License. //
-// ======================================================================== //
-
-#pragma once
-
-#include "roundline_intersector.h"
-#include "intersector_epilog.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M, int Mx, bool filter>
- struct RoundLinearCurveMiIntersector1
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- return RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
- }
- };
-
- template<int M, int Mx, bool filter>
- struct RoundLinearCurveMiMBIntersector1
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time());
- const vbool<Mx> valid = line.template valid<Mx>();
- RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time());
- const vbool<Mx> valid = line.template valid<Mx>();
- return RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct RoundLinearCurveMiIntersectorK
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- return RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct RoundLinearCurveMiMBIntersectorK
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()[k]);
- const vbool<Mx> valid = line.template valid<Mx>();
- RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()[k]);
- const vbool<Mx> valid = line.template valid<Mx>();
- return RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/sphere_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/sphere_intersector.h
deleted file mode 100644
index 3ab90c29ef..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/sphere_intersector.h
+++ /dev/null
@@ -1,183 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "../common/scene_points.h"
-#include "curve_intersector_precalculations.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct SphereIntersectorHitM
- {
- __forceinline SphereIntersectorHitM() {}
-
- __forceinline SphereIntersectorHitM(const vfloat<M>& t, const Vec3vf<M>& Ng)
- : vt(t), vNg(Ng) {}
-
- __forceinline void finalize() {}
-
- __forceinline Vec2f uv(const size_t i) const {
- return Vec2f(0.0f, 0.0f);
- }
- __forceinline float t(const size_t i) const {
- return vt[i];
- }
- __forceinline Vec3fa Ng(const size_t i) const {
- return Vec3fa(vNg.x[i], vNg.y[i], vNg.z[i]);
- }
-
- public:
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- template<int M>
- struct SphereIntersector1
- {
- typedef CurvePrecalculations1 Precalculations;
-
- template<typename Epilog>
- static __forceinline bool intersect(
- const vbool<M>& valid_i, Ray& ray,
- const Precalculations& pre, const Vec4vf<M>& v0, const Epilog& epilog)
- {
- vbool<M> valid = valid_i;
-
- const vfloat<M> rd2 = rcp(dot(ray.dir, ray.dir));
- const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
- const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z);
- const Vec3vf<M> center = v0.xyz();
- const vfloat<M> radius = v0.w;
-
- const Vec3vf<M> c0 = center - ray_org;
- const vfloat<M> projC0 = dot(c0, ray_dir) * rd2;
- const Vec3vf<M> perp = c0 - projC0 * ray_dir;
- const vfloat<M> l2 = dot(perp, perp);
- const vfloat<M> r2 = radius * radius;
- valid &= (l2 <= r2);
- if (unlikely(none(valid)))
- return false;
-
- const vfloat<M> td = sqrt((r2 - l2) * rd2);
- const vfloat<M> t_front = projC0 - td;
- const vfloat<M> t_back = projC0 + td;
-
- const vbool<M> valid_front = valid & (ray.tnear() <= t_front) & (t_front <= ray.tfar);
- const vbool<M> valid_back = valid & (ray.tnear() <= t_back ) & (t_back <= ray.tfar);
-
- /* check if there is a first hit */
- const vbool<M> valid_first = valid_front | valid_back;
- if (unlikely(none(valid_first)))
- return false;
-
- /* construct first hit */
- const vfloat<M> td_front = -td;
- const vfloat<M> td_back = +td;
- const vfloat<M> t_first = select(valid_front, t_front, t_back);
- const Vec3vf<M> Ng_first = select(valid_front, td_front, td_back) * ray_dir - perp;
- SphereIntersectorHitM<M> hit(t_first, Ng_first);
-
- /* invoke intersection filter for first hit */
- const bool is_hit_first = epilog(valid_first, hit);
-
- /* check for possible second hits before potentially accepted hit */
- const vfloat<M> t_second = t_back;
- const vbool<M> valid_second = valid_front & valid_back & (t_second <= ray.tfar);
- if (unlikely(none(valid_second)))
- return is_hit_first;
-
- /* invoke intersection filter for second hit */
- const Vec3vf<M> Ng_second = td_back * ray_dir - perp;
- hit = SphereIntersectorHitM<M> (t_second, Ng_second);
- const bool is_hit_second = epilog(valid_second, hit);
-
- return is_hit_first | is_hit_second;
- }
-
- template<typename Epilog>
- static __forceinline bool intersect(
- const vbool<M>& valid_i, Ray& ray, IntersectContext* context, const Points* geom,
- const Precalculations& pre, const Vec4vf<M>& v0i, const Epilog& epilog)
- {
- const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- return intersect(valid_i,ray,pre,v0,epilog);
- }
- };
-
- template<int M, int K>
- struct SphereIntersectorK
- {
- typedef CurvePrecalculationsK<K> Precalculations;
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- RayK<K>& ray, size_t k,
- IntersectContext* context,
- const Points* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i,
- const Epilog& epilog)
- {
- vbool<M> valid = valid_i;
-
- const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]);
- const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir));
-
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec3vf<M> center = v0.xyz();
- const vfloat<M> radius = v0.w;
-
- const Vec3vf<M> c0 = center - ray_org;
- const vfloat<M> projC0 = dot(c0, ray_dir) * rd2;
- const Vec3vf<M> perp = c0 - projC0 * ray_dir;
- const vfloat<M> l2 = dot(perp, perp);
- const vfloat<M> r2 = radius * radius;
- valid &= (l2 <= r2);
- if (unlikely(none(valid)))
- return false;
-
- const vfloat<M> td = sqrt((r2 - l2) * rd2);
- const vfloat<M> t_front = projC0 - td;
- const vfloat<M> t_back = projC0 + td;
-
- const vbool<M> valid_front = valid & (ray.tnear()[k] <= t_front) & (t_front <= ray.tfar[k]);
- const vbool<M> valid_back = valid & (ray.tnear()[k] <= t_back ) & (t_back <= ray.tfar[k]);
-
- /* check if there is a first hit */
- const vbool<M> valid_first = valid_front | valid_back;
- if (unlikely(none(valid_first)))
- return false;
-
- /* construct first hit */
- const vfloat<M> td_front = -td;
- const vfloat<M> td_back = +td;
- const vfloat<M> t_first = select(valid_front, t_front, t_back);
- const Vec3vf<M> Ng_first = select(valid_front, td_front, td_back) * ray_dir - perp;
- SphereIntersectorHitM<M> hit(t_first, Ng_first);
-
- /* invoke intersection filter for first hit */
- const bool is_hit_first = epilog(valid_first, hit);
-
- /* check for possible second hits before potentially accepted hit */
- const vfloat<M> t_second = t_back;
- const vbool<M> valid_second = valid_front & valid_back & (t_second <= ray.tfar[k]);
- if (unlikely(none(valid_second)))
- return is_hit_first;
-
- /* invoke intersection filter for second hit */
- const Vec3vf<M> Ng_second = td_back * ray_dir - perp;
- hit = SphereIntersectorHitM<M> (t_second, Ng_second);
- const bool is_hit_second = epilog(valid_second, hit);
-
- return is_hit_first | is_hit_second;
- }
- };
- } // namespace isa
-} // namespace embree
diff --git a/thirdparty/embree-aarch64/kernels/geometry/spherei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/spherei_intersector.h
deleted file mode 100644
index 1146847602..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/spherei_intersector.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "intersector_epilog.h"
-#include "pointi.h"
-#include "sphere_intersector.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M, int Mx, bool filter>
- struct SphereMiIntersector1
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre,
- RayHit& ray,
- IntersectContext* context,
- const Primitive& sphere)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(sphere.geomID());
- Vec4vf<M> v0; sphere.gather(v0, geom);
- const vbool<Mx> valid = sphere.template valid<Mx>();
- SphereIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre,
- Ray& ray,
- IntersectContext* context,
- const Primitive& sphere)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(sphere.geomID());
- Vec4vf<M> v0; sphere.gather(v0, geom);
- const vbool<Mx> valid = sphere.template valid<Mx>();
- return SphereIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query,
- PointQueryContext* context,
- const Primitive& sphere)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, sphere);
- }
- };
-
- template<int M, int Mx, bool filter>
- struct SphereMiMBIntersector1
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre,
- RayHit& ray,
- IntersectContext* context,
- const Primitive& sphere)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(sphere.geomID());
- Vec4vf<M> v0; sphere.gather(v0, geom, ray.time());
- const vbool<Mx> valid = sphere.template valid<Mx>();
- SphereIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre,
- Ray& ray,
- IntersectContext* context,
- const Primitive& sphere)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(sphere.geomID());
- Vec4vf<M> v0; sphere.gather(v0, geom, ray.time());
- const vbool<Mx> valid = sphere.template valid<Mx>();
- return SphereIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query,
- PointQueryContext* context,
- const Primitive& sphere)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, sphere);
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct SphereMiIntersectorK
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(
- const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& sphere)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(sphere.geomID());
- Vec4vf<M> v0; sphere.gather(v0, geom);
- const vbool<Mx> valid = sphere.template valid<Mx>();
- SphereIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0,
- Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID()));
- }
-
- static __forceinline bool occluded(
- const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& sphere)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(sphere.geomID());
- Vec4vf<M> v0; sphere.gather(v0, geom);
- const vbool<Mx> valid = sphere.template valid<Mx>();
- return SphereIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0,
- Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID()));
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct SphereMiMBIntersectorK
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(
- const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& sphere)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(sphere.geomID());
- Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()[k]);
- const vbool<Mx> valid = sphere.template valid<Mx>();
- SphereIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0,
- Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID()));
- }
-
- static __forceinline bool occluded(
- const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& sphere)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(sphere.geomID());
- Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()[k]);
- const vbool<Mx> valid = sphere.template valid<Mx>();
- return SphereIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0,
- Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID()));
- }
- };
- } // namespace isa
-} // namespace embree
diff --git a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1.h b/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1.h
deleted file mode 100644
index 94ad46ad87..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../geometry/primitive.h"
-#include "../subdiv/subdivpatch1base.h"
-
-namespace embree
-{
-
- struct __aligned(64) SubdivPatch1 : public SubdivPatch1Base
- {
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
-
- static Type type;
-
- public:
-
- /*! constructor for cached subdiv patch */
- SubdivPatch1 (const unsigned int gID,
- const unsigned int pID,
- const unsigned int subPatch,
- const SubdivMesh *const mesh,
- const size_t time,
- const Vec2f uv[4],
- const float edge_level[4],
- const int subdiv[4],
- const int simd_width)
- : SubdivPatch1Base(gID,pID,subPatch,mesh,time,uv,edge_level,subdiv,simd_width) {}
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1_intersector.h
deleted file mode 100644
index 74ec1de258..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1_intersector.h
+++ /dev/null
@@ -1,237 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "subdivpatch1.h"
-#include "grid_soa.h"
-#include "grid_soa_intersector1.h"
-#include "grid_soa_intersector_packet.h"
-#include "../common/ray.h"
-
-namespace embree
-{
- namespace isa
- {
- template<typename T>
- class SubdivPatch1Precalculations : public T
- {
- public:
- __forceinline SubdivPatch1Precalculations (const Ray& ray, const void* ptr)
- : T(ray,ptr) {}
- };
-
- template<int K, typename T>
- class SubdivPatch1PrecalculationsK : public T
- {
- public:
- __forceinline SubdivPatch1PrecalculationsK (const vbool<K>& valid, RayK<K>& ray)
- : T(valid,ray) {}
- };
-
- class SubdivPatch1Intersector1
- {
- public:
- typedef GridSOA Primitive;
- typedef SubdivPatch1Precalculations<GridSOAIntersector1::Precalculations> Precalculations;
-
- static __forceinline bool processLazyNode(Precalculations& pre, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- lazy_node = prim->root(0);
- pre.grid = (Primitive*)prim;
- return false;
- }
-
- /*! Intersect a ray with the primitive. */
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) GridSOAIntersector1::intersect(pre,ray,context,prim,lazy_node);
- else processLazyNode(pre,context,prim,lazy_node);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) {
- intersect(This,pre,ray,context,prim,ty,tray,lazy_node);
- }
-
- /*! Test if the ray is occluded by the primitive */
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) return GridSOAIntersector1::occluded(pre,ray,context,prim,lazy_node);
- else return processLazyNode(pre,context,prim,lazy_node);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) {
- return occluded(This,pre,ray,context,prim,ty,tray,lazy_node);
- }
-
- template<int N>
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node)
- {
- // TODO: PointQuery implement
- assert(false && "not implemented");
- return false;
- }
-
- template<int N>
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node) {
- return pointQuery(This,query,context,prim,ty,tquery,lazy_node);
- }
- };
-
- class SubdivPatch1MBIntersector1
- {
- public:
- typedef SubdivPatch1 Primitive;
- typedef GridSOAMBIntersector1::Precalculations Precalculations;
-
- static __forceinline bool processLazyNode(Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim_i, size_t& lazy_node)
- {
- Primitive* prim = (Primitive*) prim_i;
- GridSOA* grid = nullptr;
- grid = (GridSOA*) prim->root_ref.get();
- pre.itime = getTimeSegment(ray.time(), float(grid->time_steps-1), pre.ftime);
- lazy_node = grid->root(pre.itime);
- pre.grid = grid;
- return false;
- }
-
- /*! Intersect a ray with the primitive. */
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) GridSOAMBIntersector1::intersect(pre,ray,context,prim,lazy_node);
- else processLazyNode(pre,ray,context,prim,lazy_node);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) {
- intersect(This,pre,ray,context,prim,ty,tray,lazy_node);
- }
-
- /*! Test if the ray is occluded by the primitive */
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) return GridSOAMBIntersector1::occluded(pre,ray,context,prim,lazy_node);
- else return processLazyNode(pre,ray,context,prim,lazy_node);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) {
- return occluded(This,pre,ray,context,prim,ty,tray,lazy_node);
- }
-
- template<int N>
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node)
- {
- // TODO: PointQuery implement
- assert(false && "not implemented");
- return false;
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node) {
- return pointQuery(This,query,context,prim,ty,tquery,lazy_node);
- }
- };
-
- template <int K>
- struct SubdivPatch1IntersectorK
- {
- typedef GridSOA Primitive;
- typedef SubdivPatch1PrecalculationsK<K,typename GridSOAIntersectorK<K>::Precalculations> Precalculations;
-
- static __forceinline bool processLazyNode(Precalculations& pre, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- lazy_node = prim->root(0);
- pre.grid = (Primitive*)prim;
- return false;
- }
-
- template<bool robust>
- static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) GridSOAIntersectorK<K>::intersect(valid,pre,ray,context,prim,lazy_node);
- else processLazyNode(pre,context,prim,lazy_node);
- }
-
- template<bool robust>
- static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) return GridSOAIntersectorK<K>::occluded(valid,pre,ray,context,prim,lazy_node);
- else return processLazyNode(pre,context,prim,lazy_node);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) GridSOAIntersectorK<K>::intersect(pre,ray,k,context,prim,lazy_node);
- else processLazyNode(pre,context,prim,lazy_node);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) return GridSOAIntersectorK<K>::occluded(pre,ray,k,context,prim,lazy_node);
- else return processLazyNode(pre,context,prim,lazy_node);
- }
- };
-
- typedef SubdivPatch1IntersectorK<4> SubdivPatch1Intersector4;
- typedef SubdivPatch1IntersectorK<8> SubdivPatch1Intersector8;
- typedef SubdivPatch1IntersectorK<16> SubdivPatch1Intersector16;
-
- template <int K>
- struct SubdivPatch1MBIntersectorK
- {
- typedef SubdivPatch1 Primitive;
- //typedef GridSOAMBIntersectorK<K>::Precalculations Precalculations;
- typedef SubdivPatch1PrecalculationsK<K,typename GridSOAMBIntersectorK<K>::Precalculations> Precalculations;
-
- static __forceinline bool processLazyNode(Precalculations& pre, IntersectContext* context, const Primitive* prim_i, size_t& lazy_node)
- {
- Primitive* prim = (Primitive*) prim_i;
- GridSOA* grid = (GridSOA*) prim->root_ref.get();
- lazy_node = grid->troot;
- pre.grid = grid;
- return false;
- }
-
- template<bool robust>
- static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) GridSOAMBIntersectorK<K>::intersect(valid,pre,ray,context,prim,lazy_node);
- else processLazyNode(pre,context,prim,lazy_node);
- }
-
- template<bool robust>
- static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) return GridSOAMBIntersectorK<K>::occluded(valid,pre,ray,context,prim,lazy_node);
- else return processLazyNode(pre,context,prim,lazy_node);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) GridSOAMBIntersectorK<K>::intersect(pre,ray,k,context,prim,lazy_node);
- else processLazyNode(pre,context,prim,lazy_node);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) return GridSOAMBIntersectorK<K>::occluded(pre,ray,k,context,prim,lazy_node);
- else return processLazyNode(pre,context,prim,lazy_node);
- }
- };
-
- typedef SubdivPatch1MBIntersectorK<4> SubdivPatch1MBIntersector4;
- typedef SubdivPatch1MBIntersectorK<8> SubdivPatch1MBIntersector8;
- typedef SubdivPatch1MBIntersectorK<16> SubdivPatch1MBIntersector16;
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid.h
deleted file mode 100644
index 39fa6fb0f0..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/subgrid.h
+++ /dev/null
@@ -1,517 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "../common/scene_grid_mesh.h"
-#include "../bvh/bvh.h"
-
-namespace embree
-{
- /* Stores M quads from an indexed face set */
- struct SubGrid
- {
- /* Virtual interface to query information about the quad type */
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* primitive supports multiple time segments */
- static const bool singleTimeSegment = false;
-
- /* Returns maximum number of stored quads */
- static __forceinline size_t max_size() { return 1; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
-
- public:
-
- /* Default constructor */
- __forceinline SubGrid() { }
-
- /* Construction from vertices and IDs */
- __forceinline SubGrid(const unsigned int x,
- const unsigned int y,
- const unsigned int geomID,
- const unsigned int primID)
- : _x(x), _y(y), _geomID(geomID), _primID(primID)
- {
- }
-
- __forceinline bool invalid3x3X() const { return (unsigned int)_x & (1<<15); }
- __forceinline bool invalid3x3Y() const { return (unsigned int)_y & (1<<15); }
-
- /* Gather the quads */
- __forceinline void gather(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- Vec3vf4& p3,
- const GridMesh* const mesh,
- const GridMesh::Grid &g) const
- {
- /* first quad always valid */
- const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset;
- const size_t vtxID01 = vtxID00 + 1;
- const vfloat4 vtx00 = vfloat4::loadu(mesh->vertexPtr(vtxID00));
- const vfloat4 vtx01 = vfloat4::loadu(mesh->vertexPtr(vtxID01));
- const size_t vtxID10 = vtxID00 + g.lineVtxOffset;
- const size_t vtxID11 = vtxID01 + g.lineVtxOffset;
- const vfloat4 vtx10 = vfloat4::loadu(mesh->vertexPtr(vtxID10));
- const vfloat4 vtx11 = vfloat4::loadu(mesh->vertexPtr(vtxID11));
-
- /* deltaX => vtx02, vtx12 */
- const size_t deltaX = invalid3x3X() ? 0 : 1;
- const size_t vtxID02 = vtxID01 + deltaX;
- const vfloat4 vtx02 = vfloat4::loadu(mesh->vertexPtr(vtxID02));
- const size_t vtxID12 = vtxID11 + deltaX;
- const vfloat4 vtx12 = vfloat4::loadu(mesh->vertexPtr(vtxID12));
-
- /* deltaY => vtx20, vtx21 */
- const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset;
- const size_t vtxID20 = vtxID10 + deltaY;
- const size_t vtxID21 = vtxID11 + deltaY;
- const vfloat4 vtx20 = vfloat4::loadu(mesh->vertexPtr(vtxID20));
- const vfloat4 vtx21 = vfloat4::loadu(mesh->vertexPtr(vtxID21));
-
- /* deltaX/deltaY => vtx22 */
- const size_t vtxID22 = vtxID11 + deltaX + deltaY;
- const vfloat4 vtx22 = vfloat4::loadu(mesh->vertexPtr(vtxID22));
-
- transpose(vtx00,vtx01,vtx11,vtx10,p0.x,p0.y,p0.z);
- transpose(vtx01,vtx02,vtx12,vtx11,p1.x,p1.y,p1.z);
- transpose(vtx11,vtx12,vtx22,vtx21,p2.x,p2.y,p2.z);
- transpose(vtx10,vtx11,vtx21,vtx20,p3.x,p3.y,p3.z);
- }
-
- template<typename T>
- __forceinline vfloat4 getVertexMB(const GridMesh* const mesh, const size_t offset, const size_t itime, const float ftime) const
- {
- const T v0 = T::loadu(mesh->vertexPtr(offset,itime+0));
- const T v1 = T::loadu(mesh->vertexPtr(offset,itime+1));
- return lerp(v0,v1,ftime);
- }
-
- /* Gather the quads */
- __forceinline void gatherMB(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- Vec3vf4& p3,
- const GridMesh* const mesh,
- const GridMesh::Grid &g,
- const size_t itime,
- const float ftime) const
- {
- /* first quad always valid */
- const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset;
- const size_t vtxID01 = vtxID00 + 1;
- const vfloat4 vtx00 = getVertexMB<vfloat4>(mesh,vtxID00,itime,ftime);
- const vfloat4 vtx01 = getVertexMB<vfloat4>(mesh,vtxID01,itime,ftime);
- const size_t vtxID10 = vtxID00 + g.lineVtxOffset;
- const size_t vtxID11 = vtxID01 + g.lineVtxOffset;
- const vfloat4 vtx10 = getVertexMB<vfloat4>(mesh,vtxID10,itime,ftime);
- const vfloat4 vtx11 = getVertexMB<vfloat4>(mesh,vtxID11,itime,ftime);
-
- /* deltaX => vtx02, vtx12 */
- const size_t deltaX = invalid3x3X() ? 0 : 1;
- const size_t vtxID02 = vtxID01 + deltaX;
- const vfloat4 vtx02 = getVertexMB<vfloat4>(mesh,vtxID02,itime,ftime);
- const size_t vtxID12 = vtxID11 + deltaX;
- const vfloat4 vtx12 = getVertexMB<vfloat4>(mesh,vtxID12,itime,ftime);
-
- /* deltaY => vtx20, vtx21 */
- const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset;
- const size_t vtxID20 = vtxID10 + deltaY;
- const size_t vtxID21 = vtxID11 + deltaY;
- const vfloat4 vtx20 = getVertexMB<vfloat4>(mesh,vtxID20,itime,ftime);
- const vfloat4 vtx21 = getVertexMB<vfloat4>(mesh,vtxID21,itime,ftime);
-
- /* deltaX/deltaY => vtx22 */
- const size_t vtxID22 = vtxID11 + deltaX + deltaY;
- const vfloat4 vtx22 = getVertexMB<vfloat4>(mesh,vtxID22,itime,ftime);
-
- transpose(vtx00,vtx01,vtx11,vtx10,p0.x,p0.y,p0.z);
- transpose(vtx01,vtx02,vtx12,vtx11,p1.x,p1.y,p1.z);
- transpose(vtx11,vtx12,vtx22,vtx21,p2.x,p2.y,p2.z);
- transpose(vtx10,vtx11,vtx21,vtx20,p3.x,p3.y,p3.z);
- }
-
-
-
- /* Gather the quads */
- __forceinline void gather(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- Vec3vf4& p3,
- const Scene *const scene) const
- {
- const GridMesh* const mesh = scene->get<GridMesh>(geomID());
- const GridMesh::Grid &g = mesh->grid(primID());
- gather(p0,p1,p2,p3,mesh,g);
- }
-
- /* Gather the quads in the motion blur case */
- __forceinline void gatherMB(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- Vec3vf4& p3,
- const Scene *const scene,
- const size_t itime,
- const float ftime) const
- {
- const GridMesh* const mesh = scene->get<GridMesh>(geomID());
- const GridMesh::Grid &g = mesh->grid(primID());
- gatherMB(p0,p1,p2,p3,mesh,g,itime,ftime);
- }
-
- /* Gather the quads */
- __forceinline void gather(Vec3fa vtx[16], const Scene *const scene) const
- {
- const GridMesh* mesh = scene->get<GridMesh>(geomID());
- const GridMesh::Grid &g = mesh->grid(primID());
-
- /* first quad always valid */
- const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset;
- const size_t vtxID01 = vtxID00 + 1;
- const Vec3fa vtx00 = Vec3fa::loadu(mesh->vertexPtr(vtxID00));
- const Vec3fa vtx01 = Vec3fa::loadu(mesh->vertexPtr(vtxID01));
- const size_t vtxID10 = vtxID00 + g.lineVtxOffset;
- const size_t vtxID11 = vtxID01 + g.lineVtxOffset;
- const Vec3fa vtx10 = Vec3fa::loadu(mesh->vertexPtr(vtxID10));
- const Vec3fa vtx11 = Vec3fa::loadu(mesh->vertexPtr(vtxID11));
-
- /* deltaX => vtx02, vtx12 */
- const size_t deltaX = invalid3x3X() ? 0 : 1;
- const size_t vtxID02 = vtxID01 + deltaX;
- const Vec3fa vtx02 = Vec3fa::loadu(mesh->vertexPtr(vtxID02));
- const size_t vtxID12 = vtxID11 + deltaX;
- const Vec3fa vtx12 = Vec3fa::loadu(mesh->vertexPtr(vtxID12));
-
- /* deltaY => vtx20, vtx21 */
- const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset;
- const size_t vtxID20 = vtxID10 + deltaY;
- const size_t vtxID21 = vtxID11 + deltaY;
- const Vec3fa vtx20 = Vec3fa::loadu(mesh->vertexPtr(vtxID20));
- const Vec3fa vtx21 = Vec3fa::loadu(mesh->vertexPtr(vtxID21));
-
- /* deltaX/deltaY => vtx22 */
- const size_t vtxID22 = vtxID11 + deltaX + deltaY;
- const Vec3fa vtx22 = Vec3fa::loadu(mesh->vertexPtr(vtxID22));
-
- vtx[ 0] = vtx00; vtx[ 1] = vtx01; vtx[ 2] = vtx11; vtx[ 3] = vtx10;
- vtx[ 4] = vtx01; vtx[ 5] = vtx02; vtx[ 6] = vtx12; vtx[ 7] = vtx11;
- vtx[ 8] = vtx10; vtx[ 9] = vtx11; vtx[10] = vtx21; vtx[11] = vtx20;
- vtx[12] = vtx11; vtx[13] = vtx12; vtx[14] = vtx22; vtx[15] = vtx21;
- }
-
- /* Gather the quads */
- __forceinline void gatherMB(vfloat4 vtx[16], const Scene *const scene, const size_t itime, const float ftime) const
- {
- const GridMesh* mesh = scene->get<GridMesh>(geomID());
- const GridMesh::Grid &g = mesh->grid(primID());
-
- /* first quad always valid */
- const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset;
- const size_t vtxID01 = vtxID00 + 1;
- const vfloat4 vtx00 = getVertexMB<vfloat4>(mesh,vtxID00,itime,ftime);
- const vfloat4 vtx01 = getVertexMB<vfloat4>(mesh,vtxID01,itime,ftime);
- const size_t vtxID10 = vtxID00 + g.lineVtxOffset;
- const size_t vtxID11 = vtxID01 + g.lineVtxOffset;
- const vfloat4 vtx10 = getVertexMB<vfloat4>(mesh,vtxID10,itime,ftime);
- const vfloat4 vtx11 = getVertexMB<vfloat4>(mesh,vtxID11,itime,ftime);
-
- /* deltaX => vtx02, vtx12 */
- const size_t deltaX = invalid3x3X() ? 0 : 1;
- const size_t vtxID02 = vtxID01 + deltaX;
- const vfloat4 vtx02 = getVertexMB<vfloat4>(mesh,vtxID02,itime,ftime);
- const size_t vtxID12 = vtxID11 + deltaX;
- const vfloat4 vtx12 = getVertexMB<vfloat4>(mesh,vtxID12,itime,ftime);
-
- /* deltaY => vtx20, vtx21 */
- const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset;
- const size_t vtxID20 = vtxID10 + deltaY;
- const size_t vtxID21 = vtxID11 + deltaY;
- const vfloat4 vtx20 = getVertexMB<vfloat4>(mesh,vtxID20,itime,ftime);
- const vfloat4 vtx21 = getVertexMB<vfloat4>(mesh,vtxID21,itime,ftime);
-
- /* deltaX/deltaY => vtx22 */
- const size_t vtxID22 = vtxID11 + deltaX + deltaY;
- const vfloat4 vtx22 = getVertexMB<vfloat4>(mesh,vtxID22,itime,ftime);
-
- vtx[ 0] = vtx00; vtx[ 1] = vtx01; vtx[ 2] = vtx11; vtx[ 3] = vtx10;
- vtx[ 4] = vtx01; vtx[ 5] = vtx02; vtx[ 6] = vtx12; vtx[ 7] = vtx11;
- vtx[ 8] = vtx10; vtx[ 9] = vtx11; vtx[10] = vtx21; vtx[11] = vtx20;
- vtx[12] = vtx11; vtx[13] = vtx12; vtx[14] = vtx22; vtx[15] = vtx21;
- }
-
-
- /* Calculate the bounds of the subgrid */
- __forceinline const BBox3fa bounds(const Scene *const scene, const size_t itime=0) const
- {
- BBox3fa bounds = empty;
- FATAL("not implemented yet");
- return bounds;
- }
-
- /* Calculate the linear bounds of the primitive */
- __forceinline LBBox3fa linearBounds(const Scene* const scene, const size_t itime)
- {
- return LBBox3fa(bounds(scene,itime+0),bounds(scene,itime+1));
- }
-
- __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps)
- {
- LBBox3fa allBounds = empty;
- FATAL("not implemented yet");
- return allBounds;
- }
-
- __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range)
- {
- LBBox3fa allBounds = empty;
- FATAL("not implemented yet");
- return allBounds;
- }
-
-
- friend embree_ostream operator<<(embree_ostream cout, const SubGrid& sg) {
- return cout << "SubGrid " << " ( x " << sg.x() << ", y = " << sg.y() << ", geomID = " << sg.geomID() << ", primID = " << sg.primID() << " )";
- }
-
- __forceinline unsigned int geomID() const { return _geomID; }
- __forceinline unsigned int primID() const { return _primID; }
- __forceinline unsigned int x() const { return (unsigned int)_x & 0x7fff; }
- __forceinline unsigned int y() const { return (unsigned int)_y & 0x7fff; }
-
- private:
- unsigned short _x;
- unsigned short _y;
- unsigned int _geomID; // geometry ID of mesh
- unsigned int _primID; // primitive ID of primitive inside mesh
- };
-
- struct SubGridID {
- unsigned short x;
- unsigned short y;
- unsigned int primID;
-
- __forceinline SubGridID() {}
- __forceinline SubGridID(const unsigned int x, const unsigned int y, const unsigned int primID) :
- x(x), y(y), primID(primID) {}
- };
-
- /* QuantizedBaseNode as large subgrid leaf */
- template<int N>
- struct SubGridQBVHN
- {
- /* Virtual interface to query information about the quad type */
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- __forceinline size_t size() const
- {
- for (size_t i=0;i<N;i++)
- if (primID(i) == -1) return i;
- return N;
- }
-
- __forceinline void clear() {
- for (size_t i=0;i<N;i++)
- subgridIDs[i] = SubGridID(0,0,(unsigned int)-1);
- qnode.clear();
- }
-
- /* Default constructor */
- __forceinline SubGridQBVHN() { }
-
- /* Construction from vertices and IDs */
- __forceinline SubGridQBVHN(const unsigned int x[N],
- const unsigned int y[N],
- const unsigned int primID[N],
- const BBox3fa * const subGridBounds,
- const unsigned int geomID,
- const unsigned int items)
- {
- clear();
- _geomID = geomID;
-
- __aligned(64) typename BVHN<N>::AABBNode node;
- node.clear();
- for (size_t i=0;i<items;i++)
- {
- subgridIDs[i] = SubGridID(x[i],y[i],primID[i]);
- node.setBounds(i,subGridBounds[i]);
- }
- qnode.init_dim(node);
- }
-
- __forceinline unsigned int geomID() const { return _geomID; }
- __forceinline unsigned int primID(const size_t i) const { assert(i < N); return subgridIDs[i].primID; }
- __forceinline unsigned int x(const size_t i) const { assert(i < N); return subgridIDs[i].x; }
- __forceinline unsigned int y(const size_t i) const { assert(i < N); return subgridIDs[i].y; }
-
- __forceinline SubGrid subgrid(const size_t i) const {
- assert(i < N);
- assert(primID(i) != -1);
- return SubGrid(x(i),y(i),geomID(),primID(i));
- }
-
- public:
- SubGridID subgridIDs[N];
-
- typename BVHN<N>::QuantizedBaseNode qnode;
-
- unsigned int _geomID; // geometry ID of mesh
-
-
- friend embree_ostream operator<<(embree_ostream cout, const SubGridQBVHN& sg) {
- cout << "SubGridQBVHN " << embree_endl;
- for (size_t i=0;i<N;i++)
- cout << i << " ( x = " << sg.subgridIDs[i].x << ", y = " << sg.subgridIDs[i].y << ", primID = " << sg.subgridIDs[i].primID << " )" << embree_endl;
- cout << "geomID " << sg._geomID << embree_endl;
- cout << "lowerX " << sg.qnode.dequantizeLowerX() << embree_endl;
- cout << "upperX " << sg.qnode.dequantizeUpperX() << embree_endl;
- cout << "lowerY " << sg.qnode.dequantizeLowerY() << embree_endl;
- cout << "upperY " << sg.qnode.dequantizeUpperY() << embree_endl;
- cout << "lowerZ " << sg.qnode.dequantizeLowerZ() << embree_endl;
- cout << "upperZ " << sg.qnode.dequantizeUpperZ() << embree_endl;
- return cout;
- }
-
- };
-
- template<int N>
- typename SubGridQBVHN<N>::Type SubGridQBVHN<N>::type;
-
- typedef SubGridQBVHN<4> SubGridQBVH4;
- typedef SubGridQBVHN<8> SubGridQBVH8;
-
-
- /* QuantizedBaseNode as large subgrid leaf */
- template<int N>
- struct SubGridMBQBVHN
- {
- /* Virtual interface to query information about the quad type */
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- __forceinline size_t size() const
- {
- for (size_t i=0;i<N;i++)
- if (primID(i) == -1) return i;
- return N;
- }
-
- __forceinline void clear() {
- for (size_t i=0;i<N;i++)
- subgridIDs[i] = SubGridID(0,0,(unsigned int)-1);
- qnode.clear();
- }
-
- /* Default constructor */
- __forceinline SubGridMBQBVHN() { }
-
- /* Construction from vertices and IDs */
- __forceinline SubGridMBQBVHN(const unsigned int x[N],
- const unsigned int y[N],
- const unsigned int primID[N],
- const BBox3fa * const subGridBounds0,
- const BBox3fa * const subGridBounds1,
- const unsigned int geomID,
- const float toffset,
- const float tscale,
- const unsigned int items)
- {
- clear();
- _geomID = geomID;
- time_offset = toffset;
- time_scale = tscale;
-
- __aligned(64) typename BVHN<N>::AABBNode node0,node1;
- node0.clear();
- node1.clear();
- for (size_t i=0;i<items;i++)
- {
- subgridIDs[i] = SubGridID(x[i],y[i],primID[i]);
- node0.setBounds(i,subGridBounds0[i]);
- node1.setBounds(i,subGridBounds1[i]);
- }
- qnode.node0.init_dim(node0);
- qnode.node1.init_dim(node1);
- }
-
- __forceinline unsigned int geomID() const { return _geomID; }
- __forceinline unsigned int primID(const size_t i) const { assert(i < N); return subgridIDs[i].primID; }
- __forceinline unsigned int x(const size_t i) const { assert(i < N); return subgridIDs[i].x; }
- __forceinline unsigned int y(const size_t i) const { assert(i < N); return subgridIDs[i].y; }
-
- __forceinline SubGrid subgrid(const size_t i) const {
- assert(i < N);
- assert(primID(i) != -1);
- return SubGrid(x(i),y(i),geomID(),primID(i));
- }
-
- __forceinline float adjustTime(const float t) const { return time_scale * (t-time_offset); }
-
- template<int K>
- __forceinline vfloat<K> adjustTime(const vfloat<K> &t) const { return time_scale * (t-time_offset); }
-
- public:
- SubGridID subgridIDs[N];
-
- typename BVHN<N>::QuantizedBaseNodeMB qnode;
-
- float time_offset;
- float time_scale;
- unsigned int _geomID; // geometry ID of mesh
-
-
- friend embree_ostream operator<<(embree_ostream cout, const SubGridMBQBVHN& sg) {
- cout << "SubGridMBQBVHN " << embree_endl;
- for (size_t i=0;i<N;i++)
- cout << i << " ( x = " << sg.subgridIDs[i].x << ", y = " << sg.subgridIDs[i].y << ", primID = " << sg.subgridIDs[i].primID << " )" << embree_endl;
- cout << "geomID " << sg._geomID << embree_endl;
- cout << "time_offset " << sg.time_offset << embree_endl;
- cout << "time_scale " << sg.time_scale << embree_endl;
- cout << "lowerX " << sg.qnode.node0.dequantizeLowerX() << embree_endl;
- cout << "upperX " << sg.qnode.node0.dequantizeUpperX() << embree_endl;
- cout << "lowerY " << sg.qnode.node0.dequantizeLowerY() << embree_endl;
- cout << "upperY " << sg.qnode.node0.dequantizeUpperY() << embree_endl;
- cout << "lowerZ " << sg.qnode.node0.dequantizeLowerZ() << embree_endl;
- cout << "upperZ " << sg.qnode.node0.dequantizeUpperZ() << embree_endl;
- cout << "lowerX " << sg.qnode.node1.dequantizeLowerX() << embree_endl;
- cout << "upperX " << sg.qnode.node1.dequantizeUpperX() << embree_endl;
- cout << "lowerY " << sg.qnode.node1.dequantizeLowerY() << embree_endl;
- cout << "upperY " << sg.qnode.node1.dequantizeUpperY() << embree_endl;
- cout << "lowerZ " << sg.qnode.node1.dequantizeLowerZ() << embree_endl;
- cout << "upperZ " << sg.qnode.node1.dequantizeUpperZ() << embree_endl;
- return cout;
- }
-
- };
-
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector.h
deleted file mode 100644
index 045eee4329..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector.h
+++ /dev/null
@@ -1,518 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "subgrid.h"
-#include "subgrid_intersector_moeller.h"
-#include "subgrid_intersector_pluecker.h"
-
-namespace embree
-{
- namespace isa
- {
-
- // =======================================================================================
- // =================================== SubGridIntersectors ===============================
- // =======================================================================================
-
-
- template<int N, bool filter>
- struct SubGridIntersector1Moeller
- {
- typedef SubGridQBVHN<N> Primitive;
- typedef SubGridQuadMIntersector1MoellerTrumbore<4,filter> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(normal.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
- pre.intersect(ray,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
- return pre.occluded(ray,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const SubGrid& subgrid)
- {
- STAT3(point_query.trav_prims,1,1,1);
- AccelSet* accel = (AccelSet*)context->scene->get(subgrid.geomID());
- assert(accel);
- context->geomID = subgrid.geomID();
- context->primID = subgrid.primID();
- return accel->pointQuery(query, context);
- }
-
- template<int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
-#if defined(__AVX__)
- STAT3(normal.trav_hit_boxes[popcnt(mask)],1,1,1);
-#endif
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
-
- if (unlikely(dist[ID] > ray.tfar)) continue;
- intersect(pre,ray,context,prim[i].subgrid(ID));
- }
- }
- }
- template<int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
-
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
-
- if (occluded(pre,ray,context,prim[i].subgrid(ID)))
- return true;
- }
- }
- return false;
- }
-
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node)
- {
- bool changed = false;
- for (size_t i=0;i<num;i++)
- {
- vfloat<N> dist;
- size_t mask;
- if (likely(context->query_type == POINT_QUERY_TYPE_SPHERE)) {
- mask = BVHNQuantizedBaseNodePointQuerySphere1<N>::pointQuery(&prim[i].qnode,tquery,dist);
- } else {
- mask = BVHNQuantizedBaseNodePointQueryAABB1<N>::pointQuery(&prim[i].qnode,tquery,dist);
- }
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
- changed |= pointQuery(query, context, prim[i].subgrid(ID));
- }
- }
- return changed;
- }
- };
-
- template<int N, bool filter>
- struct SubGridIntersector1Pluecker
- {
- typedef SubGridQBVHN<N> Primitive;
- typedef SubGridQuadMIntersector1Pluecker<4,filter> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(normal.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
- pre.intersect(ray,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
- return pre.occluded(ray,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const SubGrid& subgrid)
- {
- STAT3(point_query.trav_prims,1,1,1);
- AccelSet* accel = (AccelSet*)context->scene->get(subgrid.geomID());
- context->geomID = subgrid.geomID();
- context->primID = subgrid.primID();
- return accel->pointQuery(query, context);
- }
-
- template<int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
-#if defined(__AVX__)
- STAT3(normal.trav_hit_boxes[popcnt(mask)],1,1,1);
-#endif
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
-
- if (unlikely(dist[ID] > ray.tfar)) continue;
- intersect(pre,ray,context,prim[i].subgrid(ID));
- }
- }
- }
-
- template<int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
-
- if (occluded(pre,ray,context,prim[i].subgrid(ID)))
- return true;
- }
- }
- return false;
- }
-
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node)
- {
- bool changed = false;
- for (size_t i=0;i<num;i++)
- {
- vfloat<N> dist;
- size_t mask;
- if (likely(context->query_type == POINT_QUERY_TYPE_SPHERE)) {
- mask = BVHNQuantizedBaseNodePointQuerySphere1<N>::pointQuery(&prim[i].qnode,tquery,dist);
- } else {
- mask = BVHNQuantizedBaseNodePointQueryAABB1<N>::pointQuery(&prim[i].qnode,tquery,dist);
- }
-#if defined(__AVX__)
- STAT3(point_query.trav_hit_boxes[popcnt(mask)],1,1,1);
-#endif
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
- changed |= pointQuery(query, context, prim[i].subgrid(ID));
- }
- }
- return changed;
- }
- };
-
- template<int N, int K, bool filter>
- struct SubGridIntersectorKMoeller
- {
- typedef SubGridQBVHN<N> Primitive;
- typedef SubGridQuadMIntersectorKMoellerTrumbore<4,K,filter> Precalculations;
-
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- Vec3fa vtx[16];
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- subgrid.gather(vtx,context->scene);
- for (unsigned int i=0; i<4; i++)
- {
- const Vec3vf<K> p0 = vtx[i*4+0];
- const Vec3vf<K> p1 = vtx[i*4+1];
- const Vec3vf<K> p2 = vtx[i*4+2];
- const Vec3vf<K> p3 = vtx[i*4+3];
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- pre.intersectK(valid_i,ray,p0,p1,p2,p3,g,subgrid,i,IntersectKEpilogM<4,K,filter>(ray,context,subgrid.geomID(),subgrid.primID(),i));
- }
- }
-
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- vbool<K> valid0 = valid_i;
- Vec3fa vtx[16];
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- subgrid.gather(vtx,context->scene);
- for (unsigned int i=0; i<4; i++)
- {
- const Vec3vf<K> p0 = vtx[i*4+0];
- const Vec3vf<K> p1 = vtx[i*4+1];
- const Vec3vf<K> p2 = vtx[i*4+2];
- const Vec3vf<K> p3 = vtx[i*4+3];
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- if (pre.intersectK(valid0,ray,p0,p1,p2,p3,g,subgrid,i,OccludedKEpilogM<4,K,filter>(valid0,ray,context,subgrid.geomID(),subgrid.primID(),i)))
- break;
- }
- return !valid0;
- }
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(normal.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
- pre.intersect1(ray,k,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
- Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
- return pre.occluded1(ray,k,context,v0,v1,v2,v3,g,subgrid);
- }
-
- template<bool robust>
- static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
- for (size_t j=0;j<num;j++)
- {
- size_t m_valid = movemask(prim[j].qnode.validMask());
- vfloat<K> dist;
- while(m_valid)
- {
- const size_t i = bscf(m_valid);
- if (none(valid & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue;
- intersect(valid,pre,ray,context,prim[j].subgrid(i));
- }
- }
- }
-
- template<bool robust>
- static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
- vbool<K> valid0 = valid;
- for (size_t j=0;j<num;j++)
- {
- size_t m_valid = movemask(prim[j].qnode.validMask());
- vfloat<K> dist;
- while(m_valid)
- {
- const size_t i = bscf(m_valid);
- if (none(valid0 & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue;
- valid0 &= !occluded(valid0,pre,ray,context,prim[j].subgrid(i));
- if (none(valid0)) break;
- }
- }
- return !valid0;
- }
-
- template<int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
-
- if (unlikely(dist[ID] > ray.tfar[k])) continue;
- intersect(pre,ray,k,context,prim[i].subgrid(ID));
- }
- }
- }
-
- template<int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
-
- if (occluded(pre,ray,k,context,prim[i].subgrid(ID)))
- return true;
- }
- }
- return false;
- }
- };
-
-
- template<int N, int K, bool filter>
- struct SubGridIntersectorKPluecker
- {
- typedef SubGridQBVHN<N> Primitive;
- typedef SubGridQuadMIntersectorKPluecker<4,K,filter> Precalculations;
-
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- Vec3fa vtx[16];
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- subgrid.gather(vtx,context->scene);
- for (unsigned int i=0; i<4; i++)
- {
- const Vec3vf<K> p0 = vtx[i*4+0];
- const Vec3vf<K> p1 = vtx[i*4+1];
- const Vec3vf<K> p2 = vtx[i*4+2];
- const Vec3vf<K> p3 = vtx[i*4+3];
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- pre.intersectK(valid_i,ray,p0,p1,p2,p3,g,subgrid,i,IntersectKEpilogM<4,K,filter>(ray,context,subgrid.geomID(),subgrid.primID(),i));
- }
- }
-
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- vbool<K> valid0 = valid_i;
- Vec3fa vtx[16];
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- subgrid.gather(vtx,context->scene);
- for (unsigned int i=0; i<4; i++)
- {
- const Vec3vf<K> p0 = vtx[i*4+0];
- const Vec3vf<K> p1 = vtx[i*4+1];
- const Vec3vf<K> p2 = vtx[i*4+2];
- const Vec3vf<K> p3 = vtx[i*4+3];
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- if (pre.intersectK(valid0,ray,p0,p1,p2,p3,g,subgrid,i,OccludedKEpilogM<4,K,filter>(valid0,ray,context,subgrid.geomID(),subgrid.primID(),i)))
- break;
- }
- return !valid0;
- }
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(normal.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
- pre.intersect1(ray,k,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
- Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
- return pre.occluded1(ray,k,context,v0,v1,v2,v3,g,subgrid);
- }
-
- template<bool robust>
- static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
- for (size_t j=0;j<num;j++)
- {
- size_t m_valid = movemask(prim[j].qnode.validMask());
- vfloat<K> dist;
- while(m_valid)
- {
- const size_t i = bscf(m_valid);
- if (none(valid & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue;
- intersect(valid,pre,ray,context,prim[j].subgrid(i));
- }
- }
- }
-
- template<bool robust>
- static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
- vbool<K> valid0 = valid;
- for (size_t j=0;j<num;j++)
- {
- size_t m_valid = movemask(prim[j].qnode.validMask());
- vfloat<K> dist;
- while(m_valid)
- {
- const size_t i = bscf(m_valid);
- if (none(valid0 & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue;
- valid0 &= !occluded(valid0,pre,ray,context,prim[j].subgrid(i));
- if (none(valid0)) break;
- }
- }
- return !valid0;
- }
-
- template<int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
-
- if (unlikely(dist[ID] > ray.tfar[k])) continue;
- intersect(pre,ray,k,context,prim[i].subgrid(ID));
- }
- }
- }
-
- template<int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
-
- if (occluded(pre,ray,k,context,prim[i].subgrid(ID)))
- return true;
- }
- }
- return false;
- }
- };
-
-
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h
deleted file mode 100644
index f65b4abf61..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h
+++ /dev/null
@@ -1,493 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "subgrid.h"
-#include "quad_intersector_moeller.h"
-
-namespace embree
-{
- namespace isa
- {
-
- /* ----------------------------- */
- /* -- single ray intersectors -- */
- /* ----------------------------- */
-
- template<int M>
- __forceinline void interpolateUV(MoellerTrumboreHitM<M> &hit,const GridMesh::Grid &g, const SubGrid& subgrid)
- {
- /* correct U,V interpolation across the entire grid */
- const vint<M> sx((int)subgrid.x());
- const vint<M> sy((int)subgrid.y());
- const vint<M> sxM(sx + vint<M>(0,1,1,0));
- const vint<M> syM(sy + vint<M>(0,0,1,1));
- const float inv_resX = rcp((float)((int)g.resX-1));
- const float inv_resY = rcp((float)((int)g.resY-1));
- hit.U = (hit.U + (vfloat<M>)sxM * hit.absDen) * inv_resX;
- hit.V = (hit.V + (vfloat<M>)syM * hit.absDen) * inv_resY;
- }
-
- template<int M, bool filter>
- struct SubGridQuadMIntersector1MoellerTrumbore;
-
- template<int M, bool filter>
- struct SubGridQuadMIntersector1MoellerTrumbore
- {
- __forceinline SubGridQuadMIntersector1MoellerTrumbore() {}
-
- __forceinline SubGridQuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {}
-
- __forceinline void intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const GridMesh::Grid &g, const SubGrid& subgrid) const
- {
- MoellerTrumboreHitM<M> hit;
- MoellerTrumboreIntersector1<M> intersector(ray,nullptr);
- Intersect1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID());
-
- /* intersect first triangle */
- if (intersector.intersect(ray,v0,v1,v3,hit))
- {
- interpolateUV<M>(hit,g,subgrid);
- epilog(hit.valid,hit);
- }
-
- /* intersect second triangle */
- if (intersector.intersect(ray,v2,v3,v1,hit))
- {
- hit.U = hit.absDen - hit.U;
- hit.V = hit.absDen - hit.V;
- interpolateUV<M>(hit,g,subgrid);
- epilog(hit.valid,hit);
- }
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const GridMesh::Grid &g, const SubGrid& subgrid) const
- {
- MoellerTrumboreHitM<M> hit;
- MoellerTrumboreIntersector1<M> intersector(ray,nullptr);
- Occluded1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID());
-
- /* intersect first triangle */
- if (intersector.intersect(ray,v0,v1,v3,hit))
- {
- interpolateUV<M>(hit,g,subgrid);
- if (epilog(hit.valid,hit))
- return true;
- }
-
- /* intersect second triangle */
- if (intersector.intersect(ray,v2,v3,v1,hit))
- {
- hit.U = hit.absDen - hit.U;
- hit.V = hit.absDen - hit.V;
- interpolateUV<M>(hit,g,subgrid);
- if (epilog(hit.valid,hit))
- return true;
- }
- return false;
- }
- };
-
-#if defined (__AVX__)
-
- /*! Intersects 4 quads with 1 ray using AVX */
- template<bool filter>
- struct SubGridQuadMIntersector1MoellerTrumbore<4,filter>
- {
- __forceinline SubGridQuadMIntersector1MoellerTrumbore() {}
-
- __forceinline SubGridQuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {}
-
- template<typename Epilog>
- __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid& subgrid, const Epilog& epilog) const
- {
- const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
- const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
-#else
- const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
- const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
-#endif
- MoellerTrumboreHitM<8> hit;
- MoellerTrumboreIntersector1<8> intersector(ray,nullptr);
- const vbool8 flags(0,0,0,0,1,1,1,1);
- if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,hit)))
- {
- vfloat8 U = hit.U, V = hit.V, absDen = hit.absDen;
-
-#if !defined(EMBREE_BACKFACE_CULLING)
- hit.U = select(flags,absDen-V,U);
- hit.V = select(flags,absDen-U,V);
- hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f));
-#else
- hit.U = select(flags,absDen-U,U);
- hit.V = select(flags,absDen-V,V);
-#endif
- /* correct U,V interpolation across the entire grid */
- const vint8 sx((int)subgrid.x());
- const vint8 sy((int)subgrid.y());
- const vint8 sx8(sx + vint8(0,1,1,0,0,1,1,0));
- const vint8 sy8(sy + vint8(0,0,1,1,0,0,1,1));
- const float inv_resX = rcp((float)((int)g.resX-1));
- const float inv_resY = rcp((float)((int)g.resY-1));
- hit.U = (hit.U + (vfloat8)sx8 * absDen) * inv_resX;
- hit.V = (hit.V + (vfloat8)sy8 * absDen) * inv_resY;
-
- if (unlikely(epilog(hit.valid,hit)))
- return true;
- }
- return false;
- }
-
- __forceinline bool intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const GridMesh::Grid &g, const SubGrid& subgrid) const
- {
- return intersect(ray,v0,v1,v2,v3,g,subgrid,Intersect1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID()));
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const GridMesh::Grid &g, const SubGrid& subgrid) const
- {
- return intersect(ray,v0,v1,v2,v3,g,subgrid,Occluded1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID()));
- }
- };
-
-#endif
-
- // ============================================================================================================================
- // ============================================================================================================================
- // ============================================================================================================================
-
-
- /* ----------------------------- */
- /* -- ray packet intersectors -- */
- /* ----------------------------- */
-
- template<int K>
- struct SubGridQuadHitK
- {
- __forceinline SubGridQuadHitK(const vfloat<K>& U,
- const vfloat<K>& V,
- const vfloat<K>& T,
- const vfloat<K>& absDen,
- const Vec3vf<K>& Ng,
- const vbool<K>& flags,
- const GridMesh::Grid &g,
- const SubGrid& subgrid,
- const unsigned int i)
- : U(U), V(V), T(T), absDen(absDen), flags(flags), tri_Ng(Ng), g(g), subgrid(subgrid), i(i) {}
-
- __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
- {
- const vfloat<K> rcpAbsDen = rcp(absDen);
- const vfloat<K> t = T * rcpAbsDen;
- const vfloat<K> u0 = min(U * rcpAbsDen,1.0f);
- const vfloat<K> v0 = min(V * rcpAbsDen,1.0f);
- const vfloat<K> u1 = vfloat<K>(1.0f) - u0;
- const vfloat<K> v1 = vfloat<K>(1.0f) - v0;
- const vfloat<K> uu = select(flags,u1,u0);
- const vfloat<K> vv = select(flags,v1,v0);
- const unsigned int sx = subgrid.x() + (unsigned int)(i % 2);
- const unsigned int sy = subgrid.y() + (unsigned int)(i >>1);
- const float inv_resX = rcp((float)(int)(g.resX-1));
- const float inv_resY = rcp((float)(int)(g.resY-1));
- const vfloat<K> u = (uu + (float)(int)sx) * inv_resX;
- const vfloat<K> v = (vv + (float)(int)sy) * inv_resY;
- const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z);
- return std::make_tuple(u,v,t,Ng);
- }
-
- private:
- const vfloat<K> U;
- const vfloat<K> V;
- const vfloat<K> T;
- const vfloat<K> absDen;
- const vbool<K> flags;
- const Vec3vf<K> tri_Ng;
-
- const GridMesh::Grid &g;
- const SubGrid& subgrid;
- const size_t i;
- };
-
- template<int M, int K, bool filter>
- struct SubGridQuadMIntersectorKMoellerTrumboreBase
- {
- __forceinline SubGridQuadMIntersectorKMoellerTrumboreBase(const vbool<K>& valid, const RayK<K>& ray) {}
-
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_e1,
- const Vec3vf<K>& tri_e2,
- const Vec3vf<K>& tri_Ng,
- const vbool<K>& flags,
- const GridMesh::Grid &g,
- const SubGrid &subgrid,
- const unsigned int i,
- const Epilog& epilog) const
- {
- /* calculate denominator */
- vbool<K> valid = valid0;
- const Vec3vf<K> C = tri_v0 - ray.org;
- const Vec3vf<K> R = cross(C,ray.dir);
- const vfloat<K> den = dot(tri_Ng,ray.dir);
- const vfloat<K> absDen = abs(den);
- const vfloat<K> sgnDen = signmsk(den);
-
- /* test against edge p2 p0 */
- const vfloat<K> U = dot(R,tri_e2) ^ sgnDen;
- valid &= U >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* test against edge p0 p1 */
- const vfloat<K> V = dot(R,tri_e1) ^ sgnDen;
- valid &= V >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* test against edge p1 p2 */
- const vfloat<K> W = absDen-U-V;
- valid &= W >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen;
- valid &= (absDen*ray.tnear() < T) & (T <= absDen*ray.tfar);
- if (unlikely(none(valid))) return false;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- valid &= den < vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-#else
- valid &= den != vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-#endif
-
- /* calculate hit information */
- SubGridQuadHitK<K> hit(U,V,T,absDen,tri_Ng,flags,g,subgrid,i);
- return epilog(valid,hit);
- }
-
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_v1,
- const Vec3vf<K>& tri_v2,
- const vbool<K>& flags,
- const GridMesh::Grid &g,
- const SubGrid &subgrid,
- const unsigned int i,
- const Epilog& epilog) const
- {
- const Vec3vf<K> e1 = tri_v0-tri_v1;
- const Vec3vf<K> e2 = tri_v2-tri_v0;
- const Vec3vf<K> Ng = cross(e2,e1);
- return intersectK(valid0,ray,tri_v0,e1,e2,Ng,flags,g,subgrid,i,epilog);
- }
-
- template<typename Epilog>
- __forceinline bool intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& v0,
- const Vec3vf<K>& v1,
- const Vec3vf<K>& v2,
- const Vec3vf<K>& v3,
- const GridMesh::Grid &g,
- const SubGrid &subgrid,
- const unsigned int i,
- const Epilog& epilog) const
- {
- intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),g,subgrid,i,epilog);
- if (none(valid0)) return true;
- intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),g,subgrid,i,epilog);
- return none(valid0);
- }
-
- static __forceinline bool intersect1(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- const Vec3vf<M>& tri_Ng,
- MoellerTrumboreHitM<M> &hit)
- {
- /* calculate denominator */
- const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k);
- const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k);
- const Vec3vf<M> C = Vec3vf<M>(tri_v0) - O;
- const Vec3vf<M> R = cross(C,D);
- const vfloat<M> den = dot(Vec3vf<M>(tri_Ng),D);
- const vfloat<M> absDen = abs(den);
- const vfloat<M> sgnDen = signmsk(den);
-
- /* perform edge tests */
- const vfloat<M> U = dot(R,Vec3vf<M>(tri_e2)) ^ sgnDen;
- const vfloat<M> V = dot(R,Vec3vf<M>(tri_e1)) ^ sgnDen;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#else
- vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#endif
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen;
- valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k]));
- if (likely(none(valid))) return false;
-
- /* calculate hit information */
- new (&hit) MoellerTrumboreHitM<M>(valid,U,V,T,absDen,tri_Ng);
- return true;
- }
-
- static __forceinline bool intersect1(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- MoellerTrumboreHitM<M> &hit)
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- const Vec3vf<M> Ng = cross(e2,e1);
- return intersect1(ray,k,v0,e1,e2,Ng,hit);
- }
-
- };
-
- template<int M, int K, bool filter>
- struct SubGridQuadMIntersectorKMoellerTrumbore : public SubGridQuadMIntersectorKMoellerTrumboreBase<M,K,filter>
- {
- __forceinline SubGridQuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray)
- : SubGridQuadMIntersectorKMoellerTrumboreBase<M,K,filter>(valid,ray) {}
-
- __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
- {
- Intersect1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID());
-
- MoellerTrumboreHitM<4> hit;
- if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,hit))
- {
- interpolateUV<M>(hit,g,subgrid);
- epilog(hit.valid,hit);
- }
-
- if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,hit))
- {
- hit.U = hit.absDen - hit.U;
- hit.V = hit.absDen - hit.V;
- interpolateUV<M>(hit,g,subgrid);
- epilog(hit.valid,hit);
- }
-
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
- {
- Occluded1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID());
-
- MoellerTrumboreHitM<4> hit;
- if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,hit))
- {
- interpolateUV<M>(hit,g,subgrid);
- if (epilog(hit.valid,hit)) return true;
- }
-
- if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,hit))
- {
- hit.U = hit.absDen - hit.U;
- hit.V = hit.absDen - hit.V;
- interpolateUV<M>(hit,g,subgrid);
- if (epilog(hit.valid,hit)) return true;
- }
- return false;
- }
- };
-
-
-#if defined (__AVX__)
-
- /*! Intersects 4 quads with 1 ray using AVX */
- template<int K, bool filter>
- struct SubGridQuadMIntersectorKMoellerTrumbore<4,K,filter> : public SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>
- {
- __forceinline SubGridQuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray)
- : SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {}
-
- template<typename Epilog>
- __forceinline bool intersect1(RayK<K>& ray, size_t k,const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const GridMesh::Grid &g, const SubGrid &subgrid, const Epilog& epilog) const
- {
- const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
- const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
-#else
- const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
- const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
-#endif
- const vbool8 flags(0,0,0,0,1,1,1,1);
-
- MoellerTrumboreHitM<8> hit;
- if (SubGridQuadMIntersectorKMoellerTrumboreBase<8,K,filter>::intersect1(ray,k,vtx0,vtx1,vtx2,hit))
- {
- vfloat8 U = hit.U, V = hit.V, absDen = hit.absDen;
-#if !defined(EMBREE_BACKFACE_CULLING)
- hit.U = select(flags,absDen-V,U);
- hit.V = select(flags,absDen-U,V);
- hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f));
-#else
- hit.U = select(flags,absDen-U,U);
- hit.V = select(flags,absDen-V,V);
-#endif
-
- /* correct U,V interpolation across the entire grid */
- const vint8 sx((int)subgrid.x());
- const vint8 sy((int)subgrid.y());
- const vint8 sx8(sx + vint8(0,1,1,0,0,1,1,0));
- const vint8 sy8(sy + vint8(0,0,1,1,0,0,1,1));
- const float inv_resX = rcp((float)((int)g.resX-1));
- const float inv_resY = rcp((float)((int)g.resY-1));
- hit.U = (hit.U + (vfloat8)sx8 * absDen) * inv_resX;
- hit.V = (hit.V + (vfloat8)sy8 * absDen) * inv_resY;
- if (unlikely(epilog(hit.valid,hit)))
- return true;
-
- }
- return false;
- }
-
- __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Intersect1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID()));
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Occluded1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID()));
- }
- };
-
-#endif
-
-
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h
deleted file mode 100644
index 1cd88aa799..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h
+++ /dev/null
@@ -1,508 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "subgrid.h"
-#include "quad_intersector_moeller.h"
-#include "quad_intersector_pluecker.h"
-
-namespace embree
-{
- namespace isa
- {
-
- template<int M>
- struct SubGridQuadHitPlueckerM
- {
- __forceinline SubGridQuadHitPlueckerM() {}
-
- __forceinline SubGridQuadHitPlueckerM(const vbool<M>& valid,
- const vfloat<M>& U,
- const vfloat<M>& V,
- const vfloat<M>& UVW,
- const vfloat<M>& t,
- const Vec3vf<M>& Ng,
- const vbool<M>& flags) : valid(valid), vt(t)
- {
- const vbool<M> invalid = abs(UVW) < min_rcp_input;
- const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW));
- const vfloat<M> u = min(U * rcpUVW,1.0f);
- const vfloat<M> v = min(V * rcpUVW,1.0f);
- const vfloat<M> u1 = vfloat<M>(1.0f) - u;
- const vfloat<M> v1 = vfloat<M>(1.0f) - v;
-#if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING)
- vu = select(flags,u1,u);
- vv = select(flags,v1,v);
- vNg = Vec3vf<M>(Ng.x,Ng.y,Ng.z);
-#else
- const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f));
- vv = select(flags,u1,v);
- vu = select(flags,v1,u);
- vNg = Vec3vf<M>(flip*Ng.x,flip*Ng.y,flip*Ng.z);
-#endif
- }
-
- __forceinline void finalize()
- {
- }
-
- __forceinline Vec2f uv(const size_t i)
- {
- const float u = vu[i];
- const float v = vv[i];
- return Vec2f(u,v);
- }
-
- __forceinline float t(const size_t i) { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- public:
- vbool<M> valid;
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- template<int M>
- __forceinline void interpolateUV(SubGridQuadHitPlueckerM<M> &hit,const GridMesh::Grid &g, const SubGrid& subgrid, const vint<M> &stepX, const vint<M> &stepY)
- {
- /* correct U,V interpolation across the entire grid */
- const vint<M> sx((int)subgrid.x());
- const vint<M> sy((int)subgrid.y());
- const vint<M> sxM(sx + stepX);
- const vint<M> syM(sy + stepY);
- const float inv_resX = rcp((float)((int)g.resX-1));
- const float inv_resY = rcp((float)((int)g.resY-1));
- hit.vu = (hit.vu + vfloat<M>(sxM)) * inv_resX;
- hit.vv = (hit.vv + vfloat<M>(syM)) * inv_resY;
- }
-
- template<int M>
- __forceinline static bool intersectPluecker(Ray& ray,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_v1,
- const Vec3vf<M>& tri_v2,
- const vbool<M>& flags,
- SubGridQuadHitPlueckerM<M> &hit)
- {
- /* calculate vertices relative to ray origin */
- const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org);
- const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir);
- const Vec3vf<M> v0 = tri_v0-O;
- const Vec3vf<M> v1 = tri_v1-O;
- const Vec3vf<M> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<M> e0 = v2-v0;
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<M> U = dot(cross(e0,v2+v0),D);
- const vfloat<M> V = dot(cross(e1,v0+v1),D);
- const vfloat<M> W = dot(cross(e2,v1+v2),D);
- const vfloat<M> UVW = U+V+W;
- const vfloat<M> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = max(U,V,W) <= eps;
-#else
- vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<M> den = twice(dot(Ng,D));
-
- /* perform depth test */
- const vfloat<M> T = twice(dot(v0,Ng));
- const vfloat<M> t = rcp(den)*T;
- valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar);
- valid &= den != vfloat<M>(zero);
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- new (&hit) SubGridQuadHitPlueckerM<M>(valid,U,V,UVW,t,Ng,flags);
- return true;
- }
-
- template<int M, bool filter>
- struct SubGridQuadMIntersector1Pluecker;
-
- template<int M, bool filter>
- struct SubGridQuadMIntersector1Pluecker
- {
- __forceinline SubGridQuadMIntersector1Pluecker() {}
-
- __forceinline SubGridQuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {}
-
- __forceinline void intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const GridMesh::Grid &g, const SubGrid& subgrid) const
- {
- SubGridQuadHitPlueckerM<M> hit;
- Intersect1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID());
-
- /* intersect first triangle */
- if (intersectPluecker(ray,v0,v1,v3,vbool<M>(false),hit))
- {
- interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
- epilog(hit.valid,hit);
- }
-
- /* intersect second triangle */
- if (intersectPluecker(ray,v2,v3,v1,vbool<M>(true),hit))
- {
- interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
- epilog(hit.valid,hit);
- }
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const GridMesh::Grid &g, const SubGrid& subgrid) const
- {
- SubGridQuadHitPlueckerM<M> hit;
- Occluded1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID());
-
- /* intersect first triangle */
- if (intersectPluecker(ray,v0,v1,v3,vbool<M>(false),hit))
- {
- interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
- if (epilog(hit.valid,hit))
- return true;
- }
-
- /* intersect second triangle */
- if (intersectPluecker(ray,v2,v3,v1,vbool<M>(true),hit))
- {
- interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
- if (epilog(hit.valid,hit))
- return true;
- }
-
- return false;
- }
- };
-
-#if defined (__AVX__)
-
- /*! Intersects 4 quads with 1 ray using AVX */
- template<bool filter>
- struct SubGridQuadMIntersector1Pluecker<4,filter>
- {
- __forceinline SubGridQuadMIntersector1Pluecker() {}
-
- __forceinline SubGridQuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {}
-
- template<typename Epilog>
- __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid& subgrid, const Epilog& epilog) const
- {
- const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
- const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
-#else
- const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
- const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
-#endif
- SubGridQuadHitPlueckerM<8> hit;
- const vbool8 flags(0,0,0,0,1,1,1,1);
- if (unlikely(intersectPluecker(ray,vtx0,vtx1,vtx2,flags,hit)))
- {
- /* correct U,V interpolation across the entire grid */
- interpolateUV<8>(hit,g,subgrid,vint<8>(0,1,1,0,0,1,1,0),vint<8>(0,0,1,1,0,0,1,1));
- if (unlikely(epilog(hit.valid,hit)))
- return true;
- }
- return false;
- }
-
- __forceinline bool intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const GridMesh::Grid &g, const SubGrid& subgrid) const
- {
- return intersect(ray,v0,v1,v2,v3,g,subgrid,Intersect1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID()));
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const GridMesh::Grid &g, const SubGrid& subgrid) const
- {
- return intersect(ray,v0,v1,v2,v3,g,subgrid,Occluded1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID()));
- }
- };
-
-#endif
-
-
- /* ----------------------------- */
- /* -- ray packet intersectors -- */
- /* ----------------------------- */
-
- template<int K>
- struct SubGridQuadHitPlueckerK
- {
- __forceinline SubGridQuadHitPlueckerK(const vfloat<K>& U,
- const vfloat<K>& V,
- const vfloat<K>& UVW,
- const vfloat<K>& t,
- const Vec3vf<K>& Ng,
- const vbool<K>& flags,
- const GridMesh::Grid &g,
- const SubGrid& subgrid,
- const unsigned int i)
- : U(U), V(V), UVW(UVW), t(t), flags(flags), tri_Ng(Ng), g(g), subgrid(subgrid), i(i) {}
-
- __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
- {
- const vbool<K> invalid = abs(UVW) < min_rcp_input;
- const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW));
- const vfloat<K> u0 = min(U * rcpUVW,1.0f);
- const vfloat<K> v0 = min(V * rcpUVW,1.0f);
- const vfloat<K> u1 = vfloat<K>(1.0f) - u0;
- const vfloat<K> v1 = vfloat<K>(1.0f) - v0;
- const vfloat<K> uu = select(flags,u1,u0);
- const vfloat<K> vv = select(flags,v1,v0);
- const unsigned int sx = subgrid.x() + (unsigned int)(i % 2);
- const unsigned int sy = subgrid.y() + (unsigned int)(i >>1);
- const float inv_resX = rcp((float)(int)(g.resX-1));
- const float inv_resY = rcp((float)(int)(g.resY-1));
- const vfloat<K> u = (uu + (float)(int)sx) * inv_resX;
- const vfloat<K> v = (vv + (float)(int)sy) * inv_resY;
- const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z);
- return std::make_tuple(u,v,t,Ng);
- }
-
- private:
- const vfloat<K> U;
- const vfloat<K> V;
- const vfloat<K> UVW;
- const vfloat<K> t;
- const vfloat<K> absDen;
- const vbool<K> flags;
- const Vec3vf<K> tri_Ng;
-
- const GridMesh::Grid &g;
- const SubGrid& subgrid;
- const size_t i;
- };
-
-
- template<int M, int K, bool filter>
- struct SubGridQuadMIntersectorKPlueckerBase
- {
- __forceinline SubGridQuadMIntersectorKPlueckerBase(const vbool<K>& valid, const RayK<K>& ray) {}
-
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_v1,
- const Vec3vf<K>& tri_v2,
- const Vec3vf<K>& tri_Ng,
- const vbool<K>& flags,
- const GridMesh::Grid &g,
- const SubGrid &subgrid,
- const unsigned int i,
- const Epilog& epilog) const
- {
- /* calculate denominator */
- /* calculate vertices relative to ray origin */
- vbool<K> valid = valid0;
- const Vec3vf<K> O = ray.org;
- const Vec3vf<K> D = ray.dir;
- const Vec3vf<K> v0 = tri_v0-O;
- const Vec3vf<K> v1 = tri_v1-O;
- const Vec3vf<K> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<K> e0 = v2-v0;
- const Vec3vf<K> e1 = v0-v1;
- const Vec3vf<K> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<K> U = dot(Vec3vf<K>(cross(e0,v2+v0)),D);
- const vfloat<K> V = dot(Vec3vf<K>(cross(e1,v0+v1)),D);
- const vfloat<K> W = dot(Vec3vf<K>(cross(e2,v1+v2)),D);
- const vfloat<K> UVW = U+V+W;
- const vfloat<K> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- valid &= max(U,V,W) <= eps;
-#else
- valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<K> den = twice(dot(Vec3vf<K>(Ng),D));
-
- /* perform depth test */
- const vfloat<K> T = twice(dot(v0,Vec3vf<K>(Ng)));
- const vfloat<K> t = rcp(den)*T;
- valid &= ray.tnear() <= t & t <= ray.tfar;
- valid &= den != vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-
- /* calculate hit information */
- SubGridQuadHitPlueckerK<K> hit(U,V,UVW,t,tri_Ng,flags,g,subgrid,i);
- return epilog(valid,hit);
- }
-
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& v0,
- const Vec3vf<K>& v1,
- const Vec3vf<K>& v2,
- const vbool<K>& flags,
- const GridMesh::Grid &g,
- const SubGrid &subgrid,
- const unsigned int i,
- const Epilog& epilog) const
- {
- const Vec3vf<K> e1 = v0-v1;
- const Vec3vf<K> e2 = v2-v0;
- const Vec3vf<K> Ng = cross(e2,e1);
- return intersectK(valid0,ray,v0,v1,v2,Ng,flags,g,subgrid,i,epilog);
- }
-
- template<typename Epilog>
- __forceinline bool intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& v0,
- const Vec3vf<K>& v1,
- const Vec3vf<K>& v2,
- const Vec3vf<K>& v3,
- const GridMesh::Grid &g,
- const SubGrid &subgrid,
- const unsigned int i,
- const Epilog& epilog) const
- {
- intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),g,subgrid,i,epilog);
- if (none(valid0)) return true;
- intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),g,subgrid,i,epilog);
- return none(valid0);
- }
-
- static __forceinline bool intersect1(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_v1,
- const Vec3vf<M>& tri_v2,
- const Vec3vf<M>& tri_Ng,
- const vbool<M>& flags,
- SubGridQuadHitPlueckerM<M> &hit)
- {
- /* calculate vertices relative to ray origin */
- const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k);
- const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k);
- const Vec3vf<M> v0 = tri_v0-O;
- const Vec3vf<M> v1 = tri_v1-O;
- const Vec3vf<M> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<M> e0 = v2-v0;
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<M> U = dot(cross(e0,v2+v0),D);
- const vfloat<M> V = dot(cross(e1,v0+v1),D);
- const vfloat<M> W = dot(cross(e2,v1+v2),D);
- const vfloat<M> UVW = U+V+W;
- const vfloat<M> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = max(U,V,W) <= eps ;
-#else
- vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<M> den = twice(dot(Ng,D));
-
- /* perform depth test */
- const vfloat<M> T = twice(dot(v0,Ng));
- const vfloat<M> t = rcp(den)*T;
- valid &= vfloat<M>(ray.tnear()[k]) <= t & t <= vfloat<M>(ray.tfar[k]);
- if (unlikely(none(valid))) return false;
-
- /* avoid division by 0 */
- valid &= den != vfloat<M>(zero);
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- new (&hit) SubGridQuadHitPlueckerM<M>(valid,U,V,UVW,t,tri_Ng,flags);
- return true;
- }
-
- static __forceinline bool intersect1(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const vbool<M>& flags,
- SubGridQuadHitPlueckerM<M> &hit)
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- const Vec3vf<M> Ng = cross(e2,e1); // FIXME: optimize!!!
- return intersect1(ray,k,v0,v1,v2,Ng,flags,hit);
- }
-
- };
-
- template<int M, int K, bool filter>
- struct SubGridQuadMIntersectorKPluecker : public SubGridQuadMIntersectorKPlueckerBase<M,K,filter>
- {
- __forceinline SubGridQuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray)
- : SubGridQuadMIntersectorKPlueckerBase<M,K,filter>(valid,ray) {}
-
- __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
- {
- Intersect1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID());
-
- SubGridQuadHitPlueckerM<4> hit;
- if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,vboolf4(false),hit))
- {
- interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
- epilog(hit.valid,hit);
- }
-
- if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,vboolf4(true),hit))
- {
- interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
- epilog(hit.valid,hit);
- }
-
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
- {
- Occluded1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID());
-
- SubGridQuadHitPlueckerM<4> hit;
- if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,vboolf4(false),hit))
- {
- interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
- if (epilog(hit.valid,hit)) return true;
- }
-
- if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,vboolf4(true),hit))
- {
- interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
- if (epilog(hit.valid,hit)) return true;
- }
- return false;
- }
- };
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_mb_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_mb_intersector.h
deleted file mode 100644
index 400a88b985..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/subgrid_mb_intersector.h
+++ /dev/null
@@ -1,236 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "subgrid_intersector.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int N, bool filter>
- struct SubGridMBIntersector1Pluecker
- {
- typedef SubGridMBQBVHN<N> Primitive;
- typedef SubGridQuadMIntersector1Pluecker<4,filter> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(normal.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- float ftime;
- const int itime = mesh->timeSegment(ray.time(), ftime);
- Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime,ftime);
- pre.intersect(ray,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- float ftime;
- const int itime = mesh->timeSegment(ray.time(), ftime);
-
- Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime,ftime);
- return pre.occluded(ray,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const SubGrid& subgrid)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, subgrid);
- }
-
- template<int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- const float time = prim[i].adjustTime(ray.time());
-
- assert(time <= 1.0f);
- size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist);
-#if defined(__AVX__)
- STAT3(normal.trav_hit_boxes[popcnt(mask)],1,1,1);
-#endif
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- if (unlikely(dist[ID] > ray.tfar)) continue;
- intersect(pre,ray,context,prim[i].subgrid(ID));
- }
- }
- }
-
- template<int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
- for (size_t i=0;i<num;i++)
- {
- const float time = prim[i].adjustTime(ray.time());
- assert(time <= 1.0f);
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- if (occluded(pre,ray,context,prim[i].subgrid(ID)))
- return true;
- }
- }
- return false;
- }
-
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node)
- {
- assert(false && "not implemented");
- return false;
- }
- };
-
-
- template<int N, int K, bool filter>
- struct SubGridMBIntersectorKPluecker
- {
- typedef SubGridMBQBVHN<N> Primitive;
- typedef SubGridQuadMIntersectorKPluecker<4,K,filter> Precalculations;
-
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- size_t m_valid = movemask(valid_i);
- while(m_valid)
- {
- size_t ID = bscf(m_valid);
- intersect(pre,ray,ID,context,subgrid);
- }
- }
-
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- vbool<K> valid0 = valid_i;
- size_t m_valid = movemask(valid_i);
- while(m_valid)
- {
- size_t ID = bscf(m_valid);
- if (occluded(pre,ray,ID,context,subgrid))
- clear(valid0,ID);
- }
- return !valid0;
- }
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(normal.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- vfloat<K> ftime;
- const vint<K> itime = mesh->timeSegment(ray.time(), ftime);
- Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime[k],ftime[k]);
- pre.intersect1(ray,k,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- vfloat<K> ftime;
- const vint<K> itime = mesh->timeSegment(ray.time(), ftime);
- Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime[k],ftime[k]);
- return pre.occluded1(ray,k,context,v0,v1,v2,v3,g,subgrid);
- }
-
- template<bool robust>
- static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
- for (size_t j=0;j<num;j++)
- {
- size_t m_valid = movemask(prim[j].qnode.validMask());
- const vfloat<K> time = prim[j].adjustTime(ray.time());
-
- vfloat<K> dist;
- while(m_valid)
- {
- const size_t i = bscf(m_valid);
- if (none(valid & isecK.intersectK(&prim[j].qnode,i,tray,time,dist))) continue;
- intersect(valid,pre,ray,context,prim[j].subgrid(i));
- }
- }
- }
-
- template<bool robust>
- static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
-
- vbool<K> valid0 = valid;
- for (size_t j=0;j<num;j++)
- {
- size_t m_valid = movemask(prim[j].qnode.validMask());
- const vfloat<K> time = prim[j].adjustTime(ray.time());
- vfloat<K> dist;
- while(m_valid)
- {
- const size_t i = bscf(m_valid);
- if (none(valid0 & isecK.intersectK(&prim[j].qnode,i,tray,time,dist))) continue;
- valid0 &= !occluded(valid0,pre,ray,context,prim[j].subgrid(i));
- if (none(valid0)) break;
- }
- }
- return !valid0;
- }
-
- template<int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
- for (size_t i=0;i<num;i++)
- {
- vfloat<N> dist;
- const float time = prim[i].adjustTime(ray.time()[k]);
- assert(time <= 1.0f);
-
- size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- if (unlikely(dist[ID] > ray.tfar[k])) continue;
- intersect(pre,ray,k,context,prim[i].subgrid(ID));
- }
- }
- }
-
- template<int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<N> dist;
- const float time = prim[i].adjustTime(ray.time()[k]);
- assert(time <= 1.0f);
-
- size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- if (occluded(pre,ray,k,context,prim[i].subgrid(ID)))
- return true;
- }
- }
- return false;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle.h b/thirdparty/embree-aarch64/kernels/geometry/triangle.h
deleted file mode 100644
index 0dedf6dc4c..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/triangle.h
+++ /dev/null
@@ -1,162 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-
-namespace embree
-{
- /* Precalculated representation for M triangles. Stores for each
- triangle a base vertex, two edges, and the geometry normal to
- speed up intersection calculations */
- template<int M>
- struct TriangleM
- {
- public:
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* Returns maximum number of stored triangles */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
-
- public:
-
- /* Default constructor */
- __forceinline TriangleM() {}
-
- /* Construction from vertices and IDs */
- __forceinline TriangleM(const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const vuint<M>& geomIDs, const vuint<M>& primIDs)
- : v0(v0), e1(v0-v1), e2(v2-v0), geomIDs(geomIDs), primIDs(primIDs) {}
-
- /* Returns a mask that tells which triangles are valid */
- __forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); }
-
- /* Returns true if the specified triangle is valid */
- __forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; }
-
- /* Returns the number of stored triangles */
- __forceinline size_t size() const { return bsf(~movemask(valid())); }
-
- /* Returns the geometry IDs */
- __forceinline vuint<M>& geomID() { return geomIDs; }
- __forceinline const vuint<M>& geomID() const { return geomIDs; }
- __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; }
-
- /* Returns the primitive IDs */
- __forceinline vuint<M>& primID() { return primIDs; }
- __forceinline const vuint<M>& primID() const { return primIDs; }
- __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
-
- /* Calculate the bounds of the triangle */
- __forceinline BBox3fa bounds() const
- {
- Vec3vf<M> p0 = v0;
- Vec3vf<M> p1 = v0-e1;
- Vec3vf<M> p2 = v0+e2;
- Vec3vf<M> lower = min(p0,p1,p2);
- Vec3vf<M> upper = max(p0,p1,p2);
- vbool<M> mask = valid();
- lower.x = select(mask,lower.x,vfloat<M>(pos_inf));
- lower.y = select(mask,lower.y,vfloat<M>(pos_inf));
- lower.z = select(mask,lower.z,vfloat<M>(pos_inf));
- upper.x = select(mask,upper.x,vfloat<M>(neg_inf));
- upper.y = select(mask,upper.y,vfloat<M>(neg_inf));
- upper.z = select(mask,upper.z,vfloat<M>(neg_inf));
- return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)),
- Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z)));
- }
-
- /* Non temporal store */
- __forceinline static void store_nt(TriangleM* dst, const TriangleM& src)
- {
- vfloat<M>::store_nt(&dst->v0.x,src.v0.x);
- vfloat<M>::store_nt(&dst->v0.y,src.v0.y);
- vfloat<M>::store_nt(&dst->v0.z,src.v0.z);
- vfloat<M>::store_nt(&dst->e1.x,src.e1.x);
- vfloat<M>::store_nt(&dst->e1.y,src.e1.y);
- vfloat<M>::store_nt(&dst->e1.z,src.e1.z);
- vfloat<M>::store_nt(&dst->e2.x,src.e2.x);
- vfloat<M>::store_nt(&dst->e2.y,src.e2.y);
- vfloat<M>::store_nt(&dst->e2.z,src.e2.z);
- vuint<M>::store_nt(&dst->geomIDs,src.geomIDs);
- vuint<M>::store_nt(&dst->primIDs,src.primIDs);
- }
-
- /* Fill triangle from triangle list */
- __forceinline void fill(const PrimRef* prims, size_t& begin, size_t end, Scene* scene)
- {
- vuint<M> vgeomID = -1, vprimID = -1;
- Vec3vf<M> v0 = zero, v1 = zero, v2 = zero;
-
- for (size_t i=0; i<M && begin<end; i++, begin++)
- {
- const PrimRef& prim = prims[begin];
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const TriangleMesh* __restrict__ const mesh = scene->get<TriangleMesh>(geomID);
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- const Vec3fa& p0 = mesh->vertex(tri.v[0]);
- const Vec3fa& p1 = mesh->vertex(tri.v[1]);
- const Vec3fa& p2 = mesh->vertex(tri.v[2]);
- vgeomID [i] = geomID;
- vprimID [i] = primID;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- }
- TriangleM::store_nt(this,TriangleM(v0,v1,v2,vgeomID,vprimID));
- }
-
- /* Updates the primitive */
- __forceinline BBox3fa update(TriangleMesh* mesh)
- {
- BBox3fa bounds = empty;
- vuint<M> vgeomID = -1, vprimID = -1;
- Vec3vf<M> v0 = zero, v1 = zero, v2 = zero;
-
- for (size_t i=0; i<M; i++)
- {
- if (unlikely(geomID(i) == -1)) break;
- const unsigned geomId = geomID(i);
- const unsigned primId = primID(i);
- const TriangleMesh::Triangle& tri = mesh->triangle(primId);
- const Vec3fa p0 = mesh->vertex(tri.v[0]);
- const Vec3fa p1 = mesh->vertex(tri.v[1]);
- const Vec3fa p2 = mesh->vertex(tri.v[2]);
- bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2)));
- vgeomID [i] = geomId;
- vprimID [i] = primId;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- }
- TriangleM::store_nt(this,TriangleM(v0,v1,v2,vgeomID,vprimID));
- return bounds;
- }
-
- public:
- Vec3vf<M> v0; // base vertex of the triangles
- Vec3vf<M> e1; // 1st edge of the triangles (v0-v1)
- Vec3vf<M> e2; // 2nd edge of the triangles (v2-v0)
- private:
- vuint<M> geomIDs; // geometry IDs
- vuint<M> primIDs; // primitive IDs
- };
-
- template<int M>
- typename TriangleM<M>::Type TriangleM<M>::type;
-
- typedef TriangleM<4> Triangle4;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector.h
deleted file mode 100644
index 125a42c5fe..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector.h
+++ /dev/null
@@ -1,96 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "triangle.h"
-#include "triangle_intersector_moeller.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Intersects M triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMIntersector1Moeller
- {
- typedef TriangleM<M> Primitive;
- typedef MoellerTrumboreIntersector1<Mx> Precalculations;
-
- /*! Intersect a ray with the M triangles and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const TriangleM<M>& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersectEdge(ray,tri.v0,tri.e1,tri.e2,Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of M triangles. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const TriangleM<M>& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.intersectEdge(ray,tri.v0,tri.e1,tri.e2,Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
-
- };
-
- /*! Intersects M triangles with K rays. */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMIntersectorKMoeller
- {
- typedef TriangleM<M> Primitive;
- typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleM<M>& tri)
- {
- STAT_USER(0,TriangleM<M>::max_size());
- for (size_t i=0; i<TriangleM<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> p0 = broadcast<vfloat<K>>(tri.v0,i);
- const Vec3vf<K> e1 = broadcast<vfloat<K>>(tri.e1,i);
- const Vec3vf<K> e2 = broadcast<vfloat<K>>(tri.e2,i);
- pre.intersectEdgeK(valid_i,ray,p0,e1,e2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleM<M>& tri)
- {
- vbool<K> valid0 = valid_i;
-
- for (size_t i=0; i<TriangleM<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- const Vec3vf<K> p0 = broadcast<vfloat<K>>(tri.v0,i);
- const Vec3vf<K> e1 = broadcast<vfloat<K>>(tri.e1,i);
- const Vec3vf<K> e2 = broadcast<vfloat<K>>(tri.e2,i);
- pre.intersectEdgeK(valid0,ray,p0,e1,e2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleM<M>& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersectEdge(ray,k,tri.v0,tri.e1,tri.e2,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleM<M>& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.intersectEdge(ray,k,tri.v0,tri.e1,tri.e2,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h
deleted file mode 100644
index b5a8519236..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h
+++ /dev/null
@@ -1,403 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "triangle.h"
-#include "intersector_epilog.h"
-
-/*! This intersector implements a modified version of the Moeller
- * Trumbore intersector from the paper "Fast, Minimum Storage
- * Ray-Triangle Intersection". In contrast to the paper we
- * precalculate some factors and factor the calculations differently
- * to allow precalculating the cross product e1 x e2. The resulting
- * algorithm is similar to the fastest one of the paper "Optimizing
- * Ray-Triangle Intersection via Automated Search". */
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct MoellerTrumboreHitM
- {
- __forceinline MoellerTrumboreHitM() {}
-
- __forceinline MoellerTrumboreHitM(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const vfloat<M>& absDen, const Vec3vf<M>& Ng)
- : U(U), V(V), T(T), absDen(absDen), valid(valid), vNg(Ng) {}
-
- __forceinline void finalize()
- {
- const vfloat<M> rcpAbsDen = rcp(absDen);
- vt = T * rcpAbsDen;
- vu = U * rcpAbsDen;
- vv = V * rcpAbsDen;
- }
-
- __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
- __forceinline float t (const size_t i) const { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- public:
- vfloat<M> U;
- vfloat<M> V;
- vfloat<M> T;
- vfloat<M> absDen;
-
- public:
- vbool<M> valid;
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- template<int M>
- struct MoellerTrumboreIntersector1
- {
- __forceinline MoellerTrumboreIntersector1() {}
-
- __forceinline MoellerTrumboreIntersector1(const Ray& ray, const void* ptr) {}
-
- __forceinline bool intersect(const vbool<M>& valid0,
- Ray& ray,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- const Vec3vf<M>& tri_Ng,
- MoellerTrumboreHitM<M>& hit) const
- {
- /* calculate denominator */
- vbool<M> valid = valid0;
- const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org);
- const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir);
- const Vec3vf<M> C = Vec3vf<M>(tri_v0) - O;
- const Vec3vf<M> R = cross(C,D);
- const vfloat<M> den = dot(Vec3vf<M>(tri_Ng),D);
-
- const vfloat<M> absDen = abs(den);
- const vfloat<M> sgnDen = signmsk(den);
-
- /* perform edge tests */
- const vfloat<M> U = dot(R,Vec3vf<M>(tri_e2)) ^ sgnDen;
- const vfloat<M> V = dot(R,Vec3vf<M>(tri_e1)) ^ sgnDen;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- valid &= (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#else
- valid &= (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#endif
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen;
- valid &= (absDen*vfloat<M>(ray.tnear()) < T) & (T <= absDen*vfloat<M>(ray.tfar));
- if (likely(none(valid))) return false;
-
-
- /* update hit information */
- new (&hit) MoellerTrumboreHitM<M>(valid,U,V,T,absDen,tri_Ng);
-
- return true;
- }
-
- __forceinline bool intersectEdge(Ray& ray,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- MoellerTrumboreHitM<M>& hit) const
- {
- vbool<M> valid = true;
- const Vec3<vfloat<M>> tri_Ng = cross(tri_e2,tri_e1);
- return intersect(valid,ray,tri_v0,tri_e1,tri_e2,tri_Ng,hit);
- }
-
- __forceinline bool intersect(Ray& ray,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- MoellerTrumboreHitM<M>& hit) const
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- return intersectEdge(ray,v0,e1,e2,hit);
- }
-
- __forceinline bool intersect(const vbool<M>& valid,
- Ray& ray,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- MoellerTrumboreHitM<M>& hit) const
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- return intersectEdge(valid,ray,v0,e1,e2,hit);
- }
-
- template<typename Epilog>
- __forceinline bool intersectEdge(Ray& ray,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& e1,
- const Vec3vf<M>& e2,
- const Epilog& epilog) const
- {
- MoellerTrumboreHitM<M> hit;
- if (likely(intersectEdge(ray,v0,e1,e2,hit))) return epilog(hit.valid,hit);
- return false;
- }
-
- template<typename Epilog>
- __forceinline bool intersect(Ray& ray,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const Epilog& epilog) const
- {
- MoellerTrumboreHitM<M> hit;
- if (likely(intersect(ray,v0,v1,v2,hit))) return epilog(hit.valid,hit);
- return false;
- }
-
- template<typename Epilog>
- __forceinline bool intersect(const vbool<M>& valid,
- Ray& ray,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const Epilog& epilog) const
- {
- MoellerTrumboreHitM<M> hit;
- if (likely(intersect(valid,ray,v0,v1,v2,hit))) return epilog(hit.valid,hit);
- return false;
- }
- };
-
- template<int K>
- struct MoellerTrumboreHitK
- {
- __forceinline MoellerTrumboreHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& T, const vfloat<K>& absDen, const Vec3vf<K>& Ng)
- : U(U), V(V), T(T), absDen(absDen), Ng(Ng) {}
-
- __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
- {
- const vfloat<K> rcpAbsDen = rcp(absDen);
- const vfloat<K> t = T * rcpAbsDen;
- const vfloat<K> u = U * rcpAbsDen;
- const vfloat<K> v = V * rcpAbsDen;
- return std::make_tuple(u,v,t,Ng);
- }
-
- private:
- const vfloat<K> U;
- const vfloat<K> V;
- const vfloat<K> T;
- const vfloat<K> absDen;
- const Vec3vf<K> Ng;
- };
-
- template<int M, int K>
- struct MoellerTrumboreIntersectorK
- {
- __forceinline MoellerTrumboreIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {}
-
- /*! Intersects K rays with one of M triangles. */
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- //RayK<K>& ray,
- const Vec3vf<K>& ray_org,
- const Vec3vf<K>& ray_dir,
- const vfloat<K>& ray_tnear,
- const vfloat<K>& ray_tfar,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_e1,
- const Vec3vf<K>& tri_e2,
- const Vec3vf<K>& tri_Ng,
- const Epilog& epilog) const
- {
- /* calculate denominator */
- vbool<K> valid = valid0;
- const Vec3vf<K> C = tri_v0 - ray_org;
- const Vec3vf<K> R = cross(C,ray_dir);
- const vfloat<K> den = dot(tri_Ng,ray_dir);
- const vfloat<K> absDen = abs(den);
- const vfloat<K> sgnDen = signmsk(den);
-
- /* test against edge p2 p0 */
- const vfloat<K> U = dot(tri_e2,R) ^ sgnDen;
- valid &= U >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* test against edge p0 p1 */
- const vfloat<K> V = dot(tri_e1,R) ^ sgnDen;
- valid &= V >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* test against edge p1 p2 */
- const vfloat<K> W = absDen-U-V;
- valid &= W >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen;
- valid &= (absDen*ray_tnear < T) & (T <= absDen*ray_tfar);
- if (unlikely(none(valid))) return false;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- valid &= den < vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-#else
- valid &= den != vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-#endif
-
- /* calculate hit information */
- MoellerTrumboreHitK<K> hit(U,V,T,absDen,tri_Ng);
- return epilog(valid,hit);
- }
-
- /*! Intersects K rays with one of M triangles. */
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_v1,
- const Vec3vf<K>& tri_v2,
- const Epilog& epilog) const
- {
- const Vec3vf<K> e1 = tri_v0-tri_v1;
- const Vec3vf<K> e2 = tri_v2-tri_v0;
- const Vec3vf<K> Ng = cross(e2,e1);
- return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,epilog);
- }
-
- /*! Intersects K rays with one of M triangles. */
- template<typename Epilog>
- __forceinline vbool<K> intersectEdgeK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_e1,
- const Vec3vf<K>& tri_e2,
- const Epilog& epilog) const
- {
- const Vec3vf<K> tri_Ng = cross(tri_e2,tri_e1);
- return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,tri_e1,tri_e2,tri_Ng,epilog);
- }
-
- /*! Intersect k'th ray from ray packet of size K with M triangles. */
- __forceinline bool intersectEdge(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- MoellerTrumboreHitM<M>& hit) const
- {
- /* calculate denominator */
- typedef Vec3vf<M> Vec3vfM;
- const Vec3vf<M> tri_Ng = cross(tri_e2,tri_e1);
-
- const Vec3vfM O = broadcast<vfloat<M>>(ray.org,k);
- const Vec3vfM D = broadcast<vfloat<M>>(ray.dir,k);
- const Vec3vfM C = Vec3vfM(tri_v0) - O;
- const Vec3vfM R = cross(C,D);
- const vfloat<M> den = dot(Vec3vfM(tri_Ng),D);
- const vfloat<M> absDen = abs(den);
- const vfloat<M> sgnDen = signmsk(den);
-
- /* perform edge tests */
- const vfloat<M> U = dot(Vec3vf<M>(tri_e2),R) ^ sgnDen;
- const vfloat<M> V = dot(Vec3vf<M>(tri_e1),R) ^ sgnDen;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#else
- vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#endif
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen;
- valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k]));
- if (likely(none(valid))) return false;
-
- /* calculate hit information */
- new (&hit) MoellerTrumboreHitM<M>(valid,U,V,T,absDen,tri_Ng);
- return true;
- }
-
- __forceinline bool intersectEdge(RayK<K>& ray,
- size_t k,
- const BBox<vfloat<M>>& time_range,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- MoellerTrumboreHitM<M>& hit) const
- {
- if (likely(intersect(ray,k,tri_v0,tri_e1,tri_e2,hit)))
- {
- hit.valid &= time_range.lower <= vfloat<M>(ray.time[k]);
- hit.valid &= vfloat<M>(ray.time[k]) < time_range.upper;
- return any(hit.valid);
- }
- return false;
- }
-
- template<typename Epilog>
- __forceinline bool intersectEdge(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- const Epilog& epilog) const
- {
- MoellerTrumboreHitM<M> hit;
- if (likely(intersectEdge(ray,k,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit);
- return false;
- }
-
- template<typename Epilog>
- __forceinline bool intersectEdge(RayK<K>& ray,
- size_t k,
- const BBox<vfloat<M>>& time_range,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- const Epilog& epilog) const
- {
- MoellerTrumboreHitM<M> hit;
- if (likely(intersectEdge(ray,k,time_range,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit);
- return false;
- }
-
- template<typename Epilog>
- __forceinline bool intersect(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const Epilog& epilog) const
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- return intersectEdge(ray,k,v0,e1,e2,epilog);
- }
-
- template<typename Epilog>
- __forceinline bool intersect(RayK<K>& ray,
- size_t k,
- const BBox<vfloat<M>>& time_range,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const Epilog& epilog) const
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- return intersectEdge(ray,k,time_range,v0,e1,e2,epilog);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_pluecker.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_pluecker.h
deleted file mode 100644
index f1de99d208..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_pluecker.h
+++ /dev/null
@@ -1,247 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "triangle.h"
-#include "trianglev.h"
-#include "trianglev_mb.h"
-#include "intersector_epilog.h"
-
-/*! Modified Pluecker ray/triangle intersector. The test first shifts
- * the ray origin into the origin of the coordinate system and then
- * uses Pluecker coordinates for the intersection. Due to the shift,
- * the Pluecker coordinate calculation simplifies and the tests get
- * numerically stable. The edge equations are watertight along the
- * edge for neighboring triangles. */
-
-namespace embree
-{
- namespace isa
- {
- template<int M, typename UVMapper>
- struct PlueckerHitM
- {
- __forceinline PlueckerHitM(const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& UVW, const vfloat<M>& t, const Vec3vf<M>& Ng, const UVMapper& mapUV)
- : U(U), V(V), UVW(UVW), mapUV(mapUV), vt(t), vNg(Ng) {}
-
- __forceinline void finalize()
- {
- const vbool<M> invalid = abs(UVW) < min_rcp_input;
- const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW));
- vu = U * rcpUVW;
- vv = V * rcpUVW;
- mapUV(vu,vv);
- }
-
- __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
- __forceinline float t (const size_t i) const { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- private:
- const vfloat<M> U;
- const vfloat<M> V;
- const vfloat<M> UVW;
- const UVMapper& mapUV;
-
- public:
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- template<int M>
- struct PlueckerIntersector1
- {
- __forceinline PlueckerIntersector1() {}
-
- __forceinline PlueckerIntersector1(const Ray& ray, const void* ptr) {}
-
- template<typename UVMapper, typename Epilog>
- __forceinline bool intersect(Ray& ray,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_v1,
- const Vec3vf<M>& tri_v2,
- const UVMapper& mapUV,
- const Epilog& epilog) const
- {
- /* calculate vertices relative to ray origin */
- const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org);
- const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir);
- const Vec3vf<M> v0 = tri_v0-O;
- const Vec3vf<M> v1 = tri_v1-O;
- const Vec3vf<M> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<M> e0 = v2-v0;
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<M> U = dot(cross(e0,v2+v0),D);
- const vfloat<M> V = dot(cross(e1,v0+v1),D);
- const vfloat<M> W = dot(cross(e2,v1+v2),D);
- const vfloat<M> UVW = U+V+W;
- const vfloat<M> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = max(U,V,W) <= eps;
-#else
- vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<M> den = twice(dot(Ng,D));
-
- /* perform depth test */
- const vfloat<M> T = twice(dot(v0,Ng));
- const vfloat<M> t = rcp(den)*T;
- valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar);
- valid &= den != vfloat<M>(zero);
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- PlueckerHitM<M,UVMapper> hit(U,V,UVW,t,Ng,mapUV);
- return epilog(valid,hit);
- }
- };
-
- template<int K, typename UVMapper>
- struct PlueckerHitK
- {
- __forceinline PlueckerHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& UVW, const vfloat<K>& t, const Vec3vf<K>& Ng, const UVMapper& mapUV)
- : U(U), V(V), UVW(UVW), t(t), Ng(Ng), mapUV(mapUV) {}
-
- __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
- {
- const vbool<K> invalid = abs(UVW) < min_rcp_input;
- const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW));
- vfloat<K> u = U * rcpUVW;
- vfloat<K> v = V * rcpUVW;
- mapUV(u,v);
- return std::make_tuple(u,v,t,Ng);
- }
-
- private:
- const vfloat<K> U;
- const vfloat<K> V;
- const vfloat<K> UVW;
- const vfloat<K> t;
- const Vec3vf<K> Ng;
- const UVMapper& mapUV;
- };
-
- template<int M, int K>
- struct PlueckerIntersectorK
- {
- __forceinline PlueckerIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {}
-
- /*! Intersects K rays with one of M triangles. */
- template<typename UVMapper, typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_v1,
- const Vec3vf<K>& tri_v2,
- const UVMapper& mapUV,
- const Epilog& epilog) const
- {
- /* calculate vertices relative to ray origin */
- vbool<K> valid = valid0;
- const Vec3vf<K> O = ray.org;
- const Vec3vf<K> D = ray.dir;
- const Vec3vf<K> v0 = tri_v0-O;
- const Vec3vf<K> v1 = tri_v1-O;
- const Vec3vf<K> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<K> e0 = v2-v0;
- const Vec3vf<K> e1 = v0-v1;
- const Vec3vf<K> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<K> U = dot(Vec3vf<K>(cross(e0,v2+v0)),D);
- const vfloat<K> V = dot(Vec3vf<K>(cross(e1,v0+v1)),D);
- const vfloat<K> W = dot(Vec3vf<K>(cross(e2,v1+v2)),D);
- const vfloat<K> UVW = U+V+W;
- const vfloat<K> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- valid &= max(U,V,W) <= eps;
-#else
- valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<K> den = twice(dot(Vec3vf<K>(Ng),D));
-
- /* perform depth test */
- const vfloat<K> T = twice(dot(v0,Vec3vf<K>(Ng)));
- const vfloat<K> t = rcp(den)*T;
- valid &= ray.tnear() <= t & t <= ray.tfar;
- valid &= den != vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-
- /* calculate hit information */
- PlueckerHitK<K,UVMapper> hit(U,V,UVW,t,Ng,mapUV);
- return epilog(valid,hit);
- }
-
- /*! Intersect k'th ray from ray packet of size K with M triangles. */
- template<typename UVMapper, typename Epilog>
- __forceinline bool intersect(RayK<K>& ray, size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_v1,
- const Vec3vf<M>& tri_v2,
- const UVMapper& mapUV,
- const Epilog& epilog) const
- {
- /* calculate vertices relative to ray origin */
- const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k);
- const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k);
- const Vec3vf<M> v0 = tri_v0-O;
- const Vec3vf<M> v1 = tri_v1-O;
- const Vec3vf<M> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<M> e0 = v2-v0;
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<M> U = dot(cross(e0,v2+v0),D);
- const vfloat<M> V = dot(cross(e1,v0+v1),D);
- const vfloat<M> W = dot(cross(e2,v1+v2),D);
- const vfloat<M> UVW = U+V+W;
- const vfloat<M> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = max(U,V,W) <= eps;
-#else
- vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<M> den = twice(dot(Ng,D));
-
- /* perform depth test */
- const vfloat<M> T = twice(dot(v0,Ng));
- const vfloat<M> t = rcp(den)*T;
- valid &= vfloat<M>(ray.tnear()[k]) <= t & t <= vfloat<M>(ray.tfar[k]);
- if (unlikely(none(valid))) return false;
-
- /* avoid division by 0 */
- valid &= den != vfloat<M>(zero);
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- PlueckerHitM<M,UVMapper> hit(U,V,UVW,t,Ng,mapUV);
- return epilog(valid,hit);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_woop.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_woop.h
deleted file mode 100644
index 63e649d8fb..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_woop.h
+++ /dev/null
@@ -1,418 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "triangle.h"
-#include "intersector_epilog.h"
-
-/*! This intersector implements a modified version of the Woop's ray-triangle intersection test */
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct WoopHitM
- {
- __forceinline WoopHitM() {}
-
- __forceinline WoopHitM(const vbool<M>& valid,
- const vfloat<M>& U,
- const vfloat<M>& V,
- const vfloat<M>& T,
- const vfloat<M>& inv_det,
- const Vec3vf<M>& Ng)
- : U(U), V(V), T(T), inv_det(inv_det), valid(valid), vNg(Ng) {}
-
- __forceinline void finalize()
- {
- vt = T;
- vu = U*inv_det;
- vv = V*inv_det;
- }
-
- __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
- __forceinline float t (const size_t i) const { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- private:
- const vfloat<M> U;
- const vfloat<M> V;
- const vfloat<M> T;
- const vfloat<M> inv_det;
-
- public:
- const vbool<M> valid;
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- template<int M>
- struct WoopPrecalculations1
- {
- unsigned int kx,ky,kz;
- Vec3vf<M> org;
- Vec3fa S;
- __forceinline WoopPrecalculations1() {}
-
- __forceinline WoopPrecalculations1(const Ray& ray, const void* ptr)
- {
- kz = maxDim(abs(ray.dir));
- kx = (kz+1) % 3;
- ky = (kx+1) % 3;
- const float inv_dir_kz = rcp(ray.dir[kz]);
- if (ray.dir[kz]) std::swap(kx,ky);
- S.x = ray.dir[kx] * inv_dir_kz;
- S.y = ray.dir[ky] * inv_dir_kz;
- S.z = inv_dir_kz;
- org = Vec3vf<M>(ray.org[kx],ray.org[ky],ray.org[kz]);
- }
- };
-
-
- template<int M>
- struct WoopIntersector1
- {
-
- typedef WoopPrecalculations1<M> Precalculations;
-
- __forceinline WoopIntersector1() {}
-
- __forceinline WoopIntersector1(const Ray& ray, const void* ptr) {}
-
- static __forceinline bool intersect(const vbool<M>& valid0,
- Ray& ray,
- const Precalculations& pre,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_v1,
- const Vec3vf<M>& tri_v2,
- WoopHitM<M>& hit)
- {
- vbool<M> valid = valid0;
-
- /* vertices relative to ray origin */
- const Vec3vf<M> org = Vec3vf<M>(pre.org.x,pre.org.y,pre.org.z);
- const Vec3vf<M> A = Vec3vf<M>(tri_v0[pre.kx],tri_v0[pre.ky],tri_v0[pre.kz]) - org;
- const Vec3vf<M> B = Vec3vf<M>(tri_v1[pre.kx],tri_v1[pre.ky],tri_v1[pre.kz]) - org;
- const Vec3vf<M> C = Vec3vf<M>(tri_v2[pre.kx],tri_v2[pre.ky],tri_v2[pre.kz]) - org;
-
- /* shear and scale vertices */
- const vfloat<M> Ax = nmadd(A.z,pre.S.x,A.x);
- const vfloat<M> Ay = nmadd(A.z,pre.S.y,A.y);
- const vfloat<M> Bx = nmadd(B.z,pre.S.x,B.x);
- const vfloat<M> By = nmadd(B.z,pre.S.y,B.y);
- const vfloat<M> Cx = nmadd(C.z,pre.S.x,C.x);
- const vfloat<M> Cy = nmadd(C.z,pre.S.y,C.y);
-
- /* scaled barycentric */
- const vfloat<M> U0 = Cx*By;
- const vfloat<M> U1 = Cy*Bx;
- const vfloat<M> V0 = Ax*Cy;
- const vfloat<M> V1 = Ay*Cx;
- const vfloat<M> W0 = Bx*Ay;
- const vfloat<M> W1 = By*Ax;
-#if !defined(__AVX512F__)
- valid &= (U0 >= U1) & (V0 >= V1) & (W0 >= W1) |
- (U0 <= U1) & (V0 <= V1) & (W0 <= W1);
-#else
- valid &= ge(ge(U0 >= U1,V0,V1),W0,W1) | le(le(U0 <= U1,V0,V1),W0,W1);
-#endif
-
- if (likely(none(valid))) return false;
- const vfloat<M> U = U0-U1;
- const vfloat<M> V = V0-V1;
- const vfloat<M> W = W0-W1;
-
- const vfloat<M> det = U+V+W;
-
- valid &= det != 0.0f;
- const vfloat<M> inv_det = rcp(det);
-
- const vfloat<M> Az = pre.S.z * A.z;
- const vfloat<M> Bz = pre.S.z * B.z;
- const vfloat<M> Cz = pre.S.z * C.z;
- const vfloat<M> T = madd(U,Az,madd(V,Bz,W*Cz));
- const vfloat<M> t = T * inv_det;
- /* perform depth test */
- valid &= (vfloat<M>(ray.tnear()) < t) & (t <= vfloat<M>(ray.tfar));
- if (likely(none(valid))) return false;
-
- const Vec3vf<M> tri_Ng = cross(tri_v2-tri_v0,tri_v0-tri_v1);
-
- /* update hit information */
- new (&hit) WoopHitM<M>(valid,U,V,t,inv_det,tri_Ng);
- return true;
- }
-
- static __forceinline bool intersect(Ray& ray,
- const Precalculations& pre,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- WoopHitM<M>& hit)
- {
- vbool<M> valid = true;
- return intersect(valid,ray,pre,v0,v1,v2,hit);
- }
-
-
- template<typename Epilog>
- static __forceinline bool intersect(Ray& ray,
- const Precalculations& pre,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const Epilog& epilog)
- {
- WoopHitM<M> hit;
- if (likely(intersect(ray,pre,v0,v1,v2,hit))) return epilog(hit.valid,hit);
- return false;
- }
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid,
- Ray& ray,
- const Precalculations& pre,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const Epilog& epilog)
- {
- WoopHitM<M> hit;
- if (likely(intersect(valid,ray,pre,v0,v1,v2,hit))) return epilog(hit.valid,hit);
- return false;
- }
- };
-
-#if 0
- template<int K>
- struct WoopHitK
- {
- __forceinline WoopHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& T, const vfloat<K>& absDen, const Vec3vf<K>& Ng)
- : U(U), V(V), T(T), absDen(absDen), Ng(Ng) {}
-
- __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
- {
- const vfloat<K> rcpAbsDen = rcp(absDen);
- const vfloat<K> t = T * rcpAbsDen;
- const vfloat<K> u = U * rcpAbsDen;
- const vfloat<K> v = V * rcpAbsDen;
- return std::make_tuple(u,v,t,Ng);
- }
-
- private:
- const vfloat<K> U;
- const vfloat<K> V;
- const vfloat<K> T;
- const vfloat<K> absDen;
- const Vec3vf<K> Ng;
- };
-
- template<int M, int K>
- struct WoopIntersectorK
- {
- __forceinline WoopIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {}
-
- /*! Intersects K rays with one of M triangles. */
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- //RayK<K>& ray,
- const Vec3vf<K>& ray_org,
- const Vec3vf<K>& ray_dir,
- const vfloat<K>& ray_tnear,
- const vfloat<K>& ray_tfar,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_e1,
- const Vec3vf<K>& tri_e2,
- const Vec3vf<K>& tri_Ng,
- const Epilog& epilog) const
- {
- /* calculate denominator */
- vbool<K> valid = valid0;
- const Vec3vf<K> C = tri_v0 - ray_org;
- const Vec3vf<K> R = cross(C,ray_dir);
- const vfloat<K> den = dot(tri_Ng,ray_dir);
- const vfloat<K> absDen = abs(den);
- const vfloat<K> sgnDen = signmsk(den);
-
- /* test against edge p2 p0 */
- const vfloat<K> U = dot(tri_e2,R) ^ sgnDen;
- valid &= U >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* test against edge p0 p1 */
- const vfloat<K> V = dot(tri_e1,R) ^ sgnDen;
- valid &= V >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* test against edge p1 p2 */
- const vfloat<K> W = absDen-U-V;
- valid &= W >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen;
- valid &= (absDen*ray_tnear < T) & (T <= absDen*ray_tfar);
- if (unlikely(none(valid))) return false;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- valid &= den < vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-#else
- valid &= den != vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-#endif
-
- /* calculate hit information */
- WoopHitK<K> hit(U,V,T,absDen,tri_Ng);
- return epilog(valid,hit);
- }
-
- /*! Intersects K rays with one of M triangles. */
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_v1,
- const Vec3vf<K>& tri_v2,
- const Epilog& epilog) const
- {
- const Vec3vf<K> e1 = tri_v0-tri_v1;
- const Vec3vf<K> e2 = tri_v2-tri_v0;
- const Vec3vf<K> Ng = cross(e2,e1);
- return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,epilog);
- }
-
- /*! Intersects K rays with one of M triangles. */
- template<typename Epilog>
- __forceinline vbool<K> intersectEdgeK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_e1,
- const Vec3vf<K>& tri_e2,
- const Epilog& epilog) const
- {
- const Vec3vf<K> tri_Ng = cross(tri_e2,tri_e1);
- return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,tri_e1,tri_e2,tri_Ng,epilog);
- }
-
- /*! Intersect k'th ray from ray packet of size K with M triangles. */
- __forceinline bool intersectEdge(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- WoopHitM<M>& hit) const
- {
- /* calculate denominator */
- typedef Vec3vf<M> Vec3vfM;
- const Vec3vf<M> tri_Ng = cross(tri_e2,tri_e1);
-
- const Vec3vfM O = broadcast<vfloat<M>>(ray.org,k);
- const Vec3vfM D = broadcast<vfloat<M>>(ray.dir,k);
- const Vec3vfM C = Vec3vfM(tri_v0) - O;
- const Vec3vfM R = cross(C,D);
- const vfloat<M> den = dot(Vec3vfM(tri_Ng),D);
- const vfloat<M> absDen = abs(den);
- const vfloat<M> sgnDen = signmsk(den);
-
- /* perform edge tests */
- const vfloat<M> U = dot(Vec3vf<M>(tri_e2),R) ^ sgnDen;
- const vfloat<M> V = dot(Vec3vf<M>(tri_e1),R) ^ sgnDen;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#else
- vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#endif
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen;
- valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k]));
- if (likely(none(valid))) return false;
-
- /* calculate hit information */
- new (&hit) WoopHitM<M>(valid,U,V,T,absDen,tri_Ng);
- return true;
- }
-
- __forceinline bool intersectEdge(RayK<K>& ray,
- size_t k,
- const BBox<vfloat<M>>& time_range,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- WoopHitM<M>& hit) const
- {
- if (likely(intersect(ray,k,tri_v0,tri_e1,tri_e2,hit)))
- {
- hit.valid &= time_range.lower <= vfloat<M>(ray.time[k]);
- hit.valid &= vfloat<M>(ray.time[k]) < time_range.upper;
- return any(hit.valid);
- }
- return false;
- }
-
- template<typename Epilog>
- __forceinline bool intersectEdge(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- const Epilog& epilog) const
- {
- WoopHitM<M> hit;
- if (likely(intersectEdge(ray,k,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit);
- return false;
- }
-
- template<typename Epilog>
- __forceinline bool intersectEdge(RayK<K>& ray,
- size_t k,
- const BBox<vfloat<M>>& time_range,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- const Epilog& epilog) const
- {
- WoopHitM<M> hit;
- if (likely(intersectEdge(ray,k,time_range,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit);
- return false;
- }
-
- template<typename Epilog>
- __forceinline bool intersect(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const Epilog& epilog) const
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- return intersectEdge(ray,k,v0,e1,e2,epilog);
- }
-
- template<typename Epilog>
- __forceinline bool intersect(RayK<K>& ray,
- size_t k,
- const BBox<vfloat<M>>& time_range,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const Epilog& epilog) const
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- return intersectEdge(ray,k,time_range,v0,e1,e2,epilog);
- }
- };
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_triangle_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_triangle_intersector.h
deleted file mode 100644
index 91b35c36f3..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/triangle_triangle_intersector.h
+++ /dev/null
@@ -1,132 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "primitive.h"
-
-namespace embree
-{
- namespace isa
- {
- struct TriangleTriangleIntersector
- {
- __forceinline static float T(float pa0, float pa1, float da0, float da1) {
- return pa0 + (pa1-pa0)*da0/(da0-da1);
- }
-
- __forceinline static bool point_line_side(const Vec2f& p, const Vec2f& a0, const Vec2f& a1) {
- return det(p-a0,a0-a1) >= 0.0f;
- }
-
- __forceinline static bool point_inside_triangle(const Vec2f& p, const Vec2f& a, const Vec2f& b, const Vec2f& c)
- {
- const bool pab = point_line_side(p,a,b);
- const bool pbc = point_line_side(p,b,c);
- const bool pca = point_line_side(p,c,a);
- return pab == pbc && pab == pca;
- }
-
- __forceinline static bool intersect_line_line(const Vec2f& a0, const Vec2f& a1, const Vec2f& b0, const Vec2f& b1)
- {
- const bool different_sides0 = point_line_side(b0,a0,a1) != point_line_side(b1,a0,a1);
- const bool different_sides1 = point_line_side(a0,b0,b1) != point_line_side(a1,b0,b1);
- return different_sides0 && different_sides1;
- }
-
- __forceinline static bool intersect_triangle_triangle (const Vec2f& a0, const Vec2f& a1, const Vec2f& a2,
- const Vec2f& b0, const Vec2f& b1, const Vec2f& b2)
- {
- const bool a01_b01 = intersect_line_line(a0,a1,b0,b1);
- if (a01_b01) return true;
- const bool a01_b12 = intersect_line_line(a0,a1,b1,b2);
- if (a01_b12) return true;
- const bool a01_b20 = intersect_line_line(a0,a1,b2,b0);
- if (a01_b20) return true;
- const bool a12_b01 = intersect_line_line(a1,a2,b0,b1);
- if (a12_b01) return true;
- const bool a12_b12 = intersect_line_line(a1,a2,b1,b2);
- if (a12_b12) return true;
- const bool a12_b20 = intersect_line_line(a1,a2,b2,b0);
- if (a12_b20) return true;
- const bool a20_b01 = intersect_line_line(a2,a0,b0,b1);
- if (a20_b01) return true;
- const bool a20_b12 = intersect_line_line(a2,a0,b1,b2);
- if (a20_b12) return true;
- const bool a20_b20 = intersect_line_line(a2,a0,b2,b0);
- if (a20_b20) return true;
-
- bool a_in_b = point_inside_triangle(a0,b0,b1,b2) && point_inside_triangle(a1,b0,b1,b2) && point_inside_triangle(a2,b0,b1,b2);
- if (a_in_b) return true;
-
- bool b_in_a = point_inside_triangle(b0,a0,a1,a2) && point_inside_triangle(b1,a0,a1,a2) && point_inside_triangle(b2,a0,a1,a2);
- if (b_in_a) return true;
-
- return false;
- }
-
- static bool intersect_triangle_triangle (const Vec3fa& a0, const Vec3fa& a1, const Vec3fa& a2,
- const Vec3fa& b0, const Vec3fa& b1, const Vec3fa& b2)
- {
- const float eps = 1E-5f;
-
- /* calculate triangle planes */
- const Vec3fa Na = cross(a1-a0,a2-a0);
- const float Ca = dot(Na,a0);
- const Vec3fa Nb = cross(b1-b0,b2-b0);
- const float Cb = dot(Nb,b0);
-
- /* project triangle A onto plane B */
- const float da0 = dot(Nb,a0)-Cb;
- const float da1 = dot(Nb,a1)-Cb;
- const float da2 = dot(Nb,a2)-Cb;
- if (max(da0,da1,da2) < -eps) return false;
- if (min(da0,da1,da2) > +eps) return false;
- //CSTAT(bvh_collide_prim_intersections4++);
-
- /* project triangle B onto plane A */
- const float db0 = dot(Na,b0)-Ca;
- const float db1 = dot(Na,b1)-Ca;
- const float db2 = dot(Na,b2)-Ca;
- if (max(db0,db1,db2) < -eps) return false;
- if (min(db0,db1,db2) > +eps) return false;
- //CSTAT(bvh_collide_prim_intersections5++);
-
- if (unlikely((std::fabs(da0) < eps && std::fabs(da1) < eps && std::fabs(da2) < eps) ||
- (std::fabs(db0) < eps && std::fabs(db1) < eps && std::fabs(db2) < eps)))
- {
- const size_t dz = maxDim(Na);
- const size_t dx = (dz+1)%3;
- const size_t dy = (dx+1)%3;
- const Vec2f A0(a0[dx],a0[dy]);
- const Vec2f A1(a1[dx],a1[dy]);
- const Vec2f A2(a2[dx],a2[dy]);
- const Vec2f B0(b0[dx],b0[dy]);
- const Vec2f B1(b1[dx],b1[dy]);
- const Vec2f B2(b2[dx],b2[dy]);
- return intersect_triangle_triangle(A0,A1,A2,B0,B1,B2);
- }
-
- const Vec3fa D = cross(Na,Nb);
- const float pa0 = dot(D,a0);
- const float pa1 = dot(D,a1);
- const float pa2 = dot(D,a2);
- const float pb0 = dot(D,b0);
- const float pb1 = dot(D,b1);
- const float pb2 = dot(D,b2);
-
- BBox1f ba = empty;
- if (min(da0,da1) <= 0.0f && max(da0,da1) >= 0.0f && abs(da0-da1) > 0.0f) ba.extend(T(pa0,pa1,da0,da1));
- if (min(da1,da2) <= 0.0f && max(da1,da2) >= 0.0f && abs(da1-da2) > 0.0f) ba.extend(T(pa1,pa2,da1,da2));
- if (min(da2,da0) <= 0.0f && max(da2,da0) >= 0.0f && abs(da2-da0) > 0.0f) ba.extend(T(pa2,pa0,da2,da0));
-
- BBox1f bb = empty;
- if (min(db0,db1) <= 0.0f && max(db0,db1) >= 0.0f && abs(db0-db1) > 0.0f) bb.extend(T(pb0,pb1,db0,db1));
- if (min(db1,db2) <= 0.0f && max(db1,db2) >= 0.0f && abs(db1-db2) > 0.0f) bb.extend(T(pb1,pb2,db1,db2));
- if (min(db2,db0) <= 0.0f && max(db2,db0) >= 0.0f && abs(db2-db0) > 0.0f) bb.extend(T(pb2,pb0,db2,db0));
-
- return conjoint(ba,bb);
- }
- };
- }
-}
-
-
diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglei.h b/thirdparty/embree-aarch64/kernels/geometry/trianglei.h
deleted file mode 100644
index 4f3118cc0c..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/trianglei.h
+++ /dev/null
@@ -1,442 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-#include "../common/scene.h"
-
-namespace embree
-{
- /* Stores M triangles from an indexed face set */
- template <int M>
- struct TriangleMi
- {
- /* Virtual interface to query information about the triangle type */
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* primitive supports multiple time segments */
- static const bool singleTimeSegment = false;
-
- /* Returns maximum number of stored triangles */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
-
- public:
-
- /* Default constructor */
- __forceinline TriangleMi() { }
-
- /* Construction from vertices and IDs */
- __forceinline TriangleMi(const vuint<M>& v0,
- const vuint<M>& v1,
- const vuint<M>& v2,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs)
-#if defined(EMBREE_COMPACT_POLYS)
- : geomIDs(geomIDs), primIDs(primIDs) {}
-#else
- : v0_(v0), v1_(v1), v2_(v2), geomIDs(geomIDs), primIDs(primIDs) {}
-#endif
-
- /* Returns a mask that tells which triangles are valid */
- __forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); }
-
- /* Returns if the specified triangle is valid */
- __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; }
-
- /* Returns the number of stored triangles */
- __forceinline size_t size() const { return bsf(~movemask(valid())); }
-
- /* Returns the geometry IDs */
- __forceinline vuint<M> geomID() const { return geomIDs; }
- __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; }
-
- /* Returns the primitive IDs */
- __forceinline vuint<M> primID() const { return primIDs; }
- __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
-
- /* Calculate the bounds of the triangles */
- __forceinline const BBox3fa bounds(const Scene *const scene, const size_t itime=0) const
- {
- BBox3fa bounds = empty;
- for (size_t i=0; i<M && valid(i); i++) {
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(i));
- bounds.extend(mesh->bounds(primID(i),itime));
- }
- return bounds;
- }
-
- /* Calculate the linear bounds of the primitive */
- __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime) {
- return LBBox3fa(bounds(scene,itime+0),bounds(scene,itime+1));
- }
-
- __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps)
- {
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<M && valid(i); i++)
- {
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(i));
- allBounds.extend(mesh->linearBounds(primID(i), itime, numTimeSteps));
- }
- return allBounds;
- }
-
- __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range)
- {
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<M && valid(i); i++)
- {
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(i));
- allBounds.extend(mesh->linearBounds(primID(i), time_range));
- }
- return allBounds;
- }
-
- /* Non-temporal store */
- __forceinline static void store_nt(TriangleMi* dst, const TriangleMi& src)
- {
-#if !defined(EMBREE_COMPACT_POLYS)
- vuint<M>::store_nt(&dst->v0_,src.v0_);
- vuint<M>::store_nt(&dst->v1_,src.v1_);
- vuint<M>::store_nt(&dst->v2_,src.v2_);
-#endif
- vuint<M>::store_nt(&dst->geomIDs,src.geomIDs);
- vuint<M>::store_nt(&dst->primIDs,src.primIDs);
- }
-
- /* Fill triangle from triangle list */
- template<typename PrimRefT>
- __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene)
- {
- vuint<M> v0 = zero, v1 = zero, v2 = zero;
- vuint<M> geomID = -1, primID = -1;
- const PrimRefT* prim = &prims[begin];
-
- for (size_t i=0; i<M; i++)
- {
- if (begin<end) {
- geomID[i] = prim->geomID();
- primID[i] = prim->primID();
-#if !defined(EMBREE_COMPACT_POLYS)
- const TriangleMesh* mesh = scene->get<TriangleMesh>(prim->geomID());
- const TriangleMesh::Triangle& tri = mesh->triangle(prim->primID());
- unsigned int int_stride = mesh->vertices0.getStride()/4;
- v0[i] = tri.v[0] * int_stride;
- v1[i] = tri.v[1] * int_stride;
- v2[i] = tri.v[2] * int_stride;
-#endif
- begin++;
- } else {
- assert(i);
- if (likely(i > 0)) {
- geomID[i] = geomID[0];
- primID[i] = -1;
- v0[i] = v0[0];
- v1[i] = v0[0];
- v2[i] = v0[0];
- }
- }
- if (begin<end) prim = &prims[begin];
- }
- new (this) TriangleMi(v0,v1,v2,geomID,primID); // FIXME: use non temporal store
- }
-
- __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime)
- {
- fill(prims, begin, end, scene);
- return linearBounds(scene, itime);
- }
-
- __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range)
- {
- fill(prims, begin, end, scene);
- return linearBounds(scene, time_range);
- }
-
- /* Updates the primitive */
- __forceinline BBox3fa update(TriangleMesh* mesh)
- {
- BBox3fa bounds = empty;
- for (size_t i=0; i<M; i++)
- {
- if (primID(i) == -1) break;
- const unsigned int primId = primID(i);
- const TriangleMesh::Triangle& tri = mesh->triangle(primId);
- const Vec3fa p0 = mesh->vertex(tri.v[0]);
- const Vec3fa p1 = mesh->vertex(tri.v[1]);
- const Vec3fa p2 = mesh->vertex(tri.v[2]);
- bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2)));
- }
- return bounds;
- }
-
- protected:
-#if !defined(EMBREE_COMPACT_POLYS)
- vuint<M> v0_; // 4 byte offset of 1st vertex
- vuint<M> v1_; // 4 byte offset of 2nd vertex
- vuint<M> v2_; // 4 byte offset of 3rd vertex
-#endif
- vuint<M> geomIDs; // geometry ID of mesh
- vuint<M> primIDs; // primitive ID of primitive inside mesh
- };
-
- namespace isa
- {
-
- template<int M>
- struct TriangleMi : public embree::TriangleMi<M>
- {
-#if !defined(EMBREE_COMPACT_POLYS)
- using embree::TriangleMi<M>::v0_;
- using embree::TriangleMi<M>::v1_;
- using embree::TriangleMi<M>::v2_;
-#endif
- using embree::TriangleMi<M>::geomIDs;
- using embree::TriangleMi<M>::primIDs;
- using embree::TriangleMi<M>::geomID;
- using embree::TriangleMi<M>::primID;
- using embree::TriangleMi<M>::valid;
-
- /* loads a single vertex */
- template<int vid>
- __forceinline Vec3f getVertex(const size_t index, const Scene *const scene) const
- {
-#if defined(EMBREE_COMPACT_POLYS)
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index));
- const TriangleMesh::Triangle& tri = mesh->triangle(primID(index));
- return (Vec3f) mesh->vertices[0][tri.v[vid]];
-#else
- const vuint<M>& v = getVertexOffset<vid>();
- const float* vertices = scene->vertices[geomID(index)];
- return (Vec3f&) vertices[v[index]];
-#endif
- }
-
- template<int vid, typename T>
- __forceinline Vec3<T> getVertex(const size_t index, const Scene *const scene, const size_t itime, const T& ftime) const
- {
-#if defined(EMBREE_COMPACT_POLYS)
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index));
- const TriangleMesh::Triangle& tri = mesh->triangle(primID(index));
- const Vec3fa v0 = mesh->vertices[itime+0][tri.v[vid]];
- const Vec3fa v1 = mesh->vertices[itime+1][tri.v[vid]];
-#else
- const vuint<M>& v = getVertexOffset<vid>();
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index));
- const float* vertices0 = (const float*) mesh->vertexPtr(0,itime+0);
- const float* vertices1 = (const float*) mesh->vertexPtr(0,itime+1);
- const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]);
- const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]);
-#endif
- const Vec3<T> p0(v0.x,v0.y,v0.z);
- const Vec3<T> p1(v1.x,v1.y,v1.z);
- return lerp(p0,p1,ftime);
- }
-
- template<int vid, int K, typename T>
- __forceinline Vec3<T> getVertex(const vbool<K>& valid, const size_t index, const Scene *const scene, const vint<K>& itime, const T& ftime) const
- {
- Vec3<T> p0, p1;
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index));
-
- for (size_t mask=movemask(valid), i=bsf(mask); mask; mask=btc(mask,i), i=bsf(mask))
- {
-#if defined(EMBREE_COMPACT_POLYS)
- const TriangleMesh::Triangle& tri = mesh->triangle(primID(index));
- const Vec3fa v0 = mesh->vertices[itime[i]+0][tri.v[vid]];
- const Vec3fa v1 = mesh->vertices[itime[i]+1][tri.v[vid]];
-#else
- const vuint<M>& v = getVertexOffset<vid>();
- const float* vertices0 = (const float*) mesh->vertexPtr(0,itime[i]+0);
- const float* vertices1 = (const float*) mesh->vertexPtr(0,itime[i]+1);
- const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]);
- const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]);
-#endif
- p0.x[i] = v0.x; p0.y[i] = v0.y; p0.z[i] = v0.z;
- p1.x[i] = v1.x; p1.y[i] = v1.y; p1.z[i] = v1.z;
- }
- return (T(one)-ftime)*p0 + ftime*p1;
- }
-
- struct Triangle {
- vfloat4 v0,v1,v2;
- };
-
-#if defined(EMBREE_COMPACT_POLYS)
-
- __forceinline Triangle loadTriangle(const int i, const Scene* const scene) const
- {
- const unsigned int geomID = geomIDs[i];
- const unsigned int primID = primIDs[i];
- if (unlikely(primID == -1)) return { zero, zero, zero };
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID);
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- const vfloat4 v0 = (vfloat4) mesh->vertices0[tri.v[0]];
- const vfloat4 v1 = (vfloat4) mesh->vertices0[tri.v[1]];
- const vfloat4 v2 = (vfloat4) mesh->vertices0[tri.v[2]];
- return { v0, v1, v2 };
- }
-
- __forceinline Triangle loadTriangle(const int i, const int itime, const TriangleMesh* const mesh) const
- {
- const unsigned int primID = primIDs[i];
- if (unlikely(primID == -1)) return { zero, zero, zero };
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- const vfloat4 v0 = (vfloat4) mesh->vertices[itime][tri.v[0]];
- const vfloat4 v1 = (vfloat4) mesh->vertices[itime][tri.v[1]];
- const vfloat4 v2 = (vfloat4) mesh->vertices[itime][tri.v[2]];
- return { v0, v1, v2 };
- }
-
-#else
-
- __forceinline Triangle loadTriangle(const int i, const Scene* const scene) const
- {
- const float* vertices = scene->vertices[geomID(i)];
- const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]);
- const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]);
- const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]);
- return { v0, v1, v2 };
- }
-
- __forceinline Triangle loadTriangle(const int i, const int itime, const TriangleMesh* const mesh) const
- {
- const float* vertices = (const float*) mesh->vertexPtr(0,itime);
- const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]);
- const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]);
- const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]);
- return { v0, v1, v2 };
- }
-
-#endif
-
- /* Gather the triangles */
- __forceinline void gather(Vec3vf<M>& p0, Vec3vf<M>& p1, Vec3vf<M>& p2, const Scene* const scene) const;
-
- template<int K>
-#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 2000) // workaround for compiler bug in ICC 2019
- __noinline
-#else
- __forceinline
-#endif
- void gather(const vbool<K>& valid,
- Vec3vf<K>& p0,
- Vec3vf<K>& p1,
- Vec3vf<K>& p2,
- const size_t index,
- const Scene* const scene,
- const vfloat<K>& time) const
- {
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index));
-
- vfloat<K> ftime;
- const vint<K> itime = mesh->timeSegment(time, ftime);
-
- const size_t first = bsf(movemask(valid));
- if (likely(all(valid,itime[first] == itime)))
- {
- p0 = getVertex<0>(index, scene, itime[first], ftime);
- p1 = getVertex<1>(index, scene, itime[first], ftime);
- p2 = getVertex<2>(index, scene, itime[first], ftime);
- } else {
- p0 = getVertex<0>(valid, index, scene, itime, ftime);
- p1 = getVertex<1>(valid, index, scene, itime, ftime);
- p2 = getVertex<2>(valid, index, scene, itime, ftime);
- }
- }
-
- __forceinline void gather(Vec3vf<M>& p0,
- Vec3vf<M>& p1,
- Vec3vf<M>& p2,
- const TriangleMesh* mesh,
- const Scene *const scene,
- const int itime) const;
-
- __forceinline void gather(Vec3vf<M>& p0,
- Vec3vf<M>& p1,
- Vec3vf<M>& p2,
- const Scene *const scene,
- const float time) const;
-
-
-#if !defined(EMBREE_COMPACT_POLYS)
- template<int N> const vuint<M>& getVertexOffset() const;
-#endif
- };
-
-#if !defined(EMBREE_COMPACT_POLYS)
- template<> template<> __forceinline const vuint<4>& TriangleMi<4>::getVertexOffset<0>() const { return v0_; }
- template<> template<> __forceinline const vuint<4>& TriangleMi<4>::getVertexOffset<1>() const { return v1_; }
- template<> template<> __forceinline const vuint<4>& TriangleMi<4>::getVertexOffset<2>() const { return v2_; }
-#endif
-
- template<>
- __forceinline void TriangleMi<4>::gather(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- const Scene* const scene) const
- {
- const Triangle tri0 = loadTriangle(0,scene);
- const Triangle tri1 = loadTriangle(1,scene);
- const Triangle tri2 = loadTriangle(2,scene);
- const Triangle tri3 = loadTriangle(3,scene);
- transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z);
- transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z);
- transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z);
- }
-
- template<>
- __forceinline void TriangleMi<4>::gather(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- const TriangleMesh* mesh,
- const Scene *const scene,
- const int itime) const
- {
- const Triangle tri0 = loadTriangle(0,itime,mesh);
- const Triangle tri1 = loadTriangle(1,itime,mesh);
- const Triangle tri2 = loadTriangle(2,itime,mesh);
- const Triangle tri3 = loadTriangle(3,itime,mesh);
- transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z);
- transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z);
- transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z);
- }
-
- template<>
- __forceinline void TriangleMi<4>::gather(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- const Scene *const scene,
- const float time) const
- {
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(0)); // in mblur mode all geometries are identical
-
- float ftime;
- const int itime = mesh->timeSegment(time, ftime);
-
- Vec3vf4 a0,a1,a2; gather(a0,a1,a2,mesh,scene,itime);
- Vec3vf4 b0,b1,b2; gather(b0,b1,b2,mesh,scene,itime+1);
- p0 = lerp(a0,b0,vfloat4(ftime));
- p1 = lerp(a1,b1,vfloat4(ftime));
- p2 = lerp(a2,b2,vfloat4(ftime));
- }
- }
-
- template<int M>
- typename TriangleMi<M>::Type TriangleMi<M>::type;
-
- typedef TriangleMi<4> Triangle4i;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/trianglei_intersector.h
deleted file mode 100644
index e2f106a62c..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/trianglei_intersector.h
+++ /dev/null
@@ -1,336 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "trianglei.h"
-#include "triangle_intersector_moeller.h"
-#include "triangle_intersector_pluecker.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Intersects M triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMiIntersector1Moeller
- {
- typedef TriangleMi<M> Primitive;
- typedef MoellerTrumboreIntersector1<Mx> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
- pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
- return pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
- /*! Intersects M triangles with K rays */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMiIntersectorKMoeller
- {
- typedef TriangleMi<M> Primitive;
- typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations;
-
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri)
- {
- const Scene* scene = context->scene;
- for (size_t i=0; i<Primitive::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),RayHitK<K>::size());
- const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene);
- const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene);
- const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene);
- pre.intersectK(valid_i,ray,v0,v1,v2,/*UVIdentity<K>(),*/IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& tri)
- {
- vbool<K> valid0 = valid_i;
- const Scene* scene = context->scene;
-
- for (size_t i=0; i<Primitive::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid_i),RayHitK<K>::size());
- const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene);
- const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene);
- const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene);
- pre.intersectK(valid0,ray,v0,v1,v2,/*UVIdentity<K>(),*/OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
- pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
- return pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
- };
-
- /*! Intersects M triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMiIntersector1Pluecker
- {
- typedef TriangleMi<M> Primitive;
- typedef PlueckerIntersector1<Mx> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
- pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
- return pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
- /*! Intersects M triangles with K rays */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMiIntersectorKPluecker
- {
- typedef TriangleMi<M> Primitive;
- typedef PlueckerIntersectorK<Mx,K> Precalculations;
-
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri)
- {
- const Scene* scene = context->scene;
- for (size_t i=0; i<Primitive::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),RayHitK<K>::size());
- const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene);
- const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene);
- const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene);
- pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& tri)
- {
- vbool<K> valid0 = valid_i;
- const Scene* scene = context->scene;
-
- for (size_t i=0; i<Primitive::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid_i),RayHitK<K>::size());
- const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene);
- const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene);
- const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene);
- pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
- pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
- return pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
- };
-
- /*! Intersects M motion blur triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMiMBIntersector1Moeller
- {
- typedef TriangleMi<M> Primitive;
- typedef MoellerTrumboreIntersector1<Mx> Precalculations;
-
- /*! Intersect a ray with the M triangles and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time());
- pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of M triangles. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time());
- return pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
- /*! Intersects M motion blur triangles with K rays. */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMiMBIntersectorKMoeller
- {
- typedef TriangleMi<M> Primitive;
- typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri)
- {
- for (size_t i=0; i<TriangleMi<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time());
- pre.intersectK(valid_i,ray,v0,v1,v2,/*UVIdentity<K>(),*/IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri)
- {
- vbool<K> valid0 = valid_i;
- for (size_t i=0; i<TriangleMi<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time());
- pre.intersectK(valid0,ray,v0,v1,v2,/*UVIdentity<K>(),*/OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMi<M>& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]);
- pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMi<M>& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]);
- return pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
- };
-
- /*! Intersects M motion blur triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMiMBIntersector1Pluecker
- {
- typedef TriangleMi<M> Primitive;
- typedef PlueckerIntersector1<Mx> Precalculations;
-
- /*! Intersect a ray with the M triangles and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time());
- pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of M triangles. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time());
- return pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
- /*! Intersects M motion blur triangles with K rays. */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMiMBIntersectorKPluecker
- {
- typedef TriangleMi<M> Primitive;
- typedef PlueckerIntersectorK<Mx,K> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri)
- {
- for (size_t i=0; i<TriangleMi<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time());
- pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri)
- {
- vbool<K> valid0 = valid_i;
- for (size_t i=0; i<TriangleMi<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time());
- pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMi<M>& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]);
- pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMi<M>& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]);
- return pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev.h b/thirdparty/embree-aarch64/kernels/geometry/trianglev.h
deleted file mode 100644
index 19af389e73..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/trianglev.h
+++ /dev/null
@@ -1,157 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-
-namespace embree
-{
- /* Stores the vertices of M triangles in struct of array layout */
- template <int M>
- struct TriangleMv
- {
- public:
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* Returns maximum number of stored triangles */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
-
- public:
-
- /* Default constructor */
- __forceinline TriangleMv() {}
-
- /* Construction from vertices and IDs */
- __forceinline TriangleMv(const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const vuint<M>& geomIDs, const vuint<M>& primIDs)
- : v0(v0), v1(v1), v2(v2), geomIDs(geomIDs), primIDs(primIDs) {}
-
- /* Returns a mask that tells which triangles are valid */
- __forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); }
-
- /* Returns true if the specified triangle is valid */
- __forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; }
-
- /* Returns the number of stored triangles */
- __forceinline size_t size() const { return bsf(~movemask(valid())); }
-
- /* Returns the geometry IDs */
- __forceinline vuint<M>& geomID() { return geomIDs; }
- __forceinline const vuint<M>& geomID() const { return geomIDs; }
- __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; }
-
- /* Returns the primitive IDs */
- __forceinline vuint<M>& primID() { return primIDs; }
- __forceinline const vuint<M>& primID() const { return primIDs; }
- __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
-
- /* Calculate the bounds of the triangles */
- __forceinline BBox3fa bounds() const
- {
- Vec3vf<M> lower = min(v0,v1,v2);
- Vec3vf<M> upper = max(v0,v1,v2);
- vbool<M> mask = valid();
- lower.x = select(mask,lower.x,vfloat<M>(pos_inf));
- lower.y = select(mask,lower.y,vfloat<M>(pos_inf));
- lower.z = select(mask,lower.z,vfloat<M>(pos_inf));
- upper.x = select(mask,upper.x,vfloat<M>(neg_inf));
- upper.y = select(mask,upper.y,vfloat<M>(neg_inf));
- upper.z = select(mask,upper.z,vfloat<M>(neg_inf));
- return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)),
- Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z)));
- }
-
- /* Non temporal store */
- __forceinline static void store_nt(TriangleMv* dst, const TriangleMv& src)
- {
- vfloat<M>::store_nt(&dst->v0.x,src.v0.x);
- vfloat<M>::store_nt(&dst->v0.y,src.v0.y);
- vfloat<M>::store_nt(&dst->v0.z,src.v0.z);
- vfloat<M>::store_nt(&dst->v1.x,src.v1.x);
- vfloat<M>::store_nt(&dst->v1.y,src.v1.y);
- vfloat<M>::store_nt(&dst->v1.z,src.v1.z);
- vfloat<M>::store_nt(&dst->v2.x,src.v2.x);
- vfloat<M>::store_nt(&dst->v2.y,src.v2.y);
- vfloat<M>::store_nt(&dst->v2.z,src.v2.z);
- vuint<M>::store_nt(&dst->geomIDs,src.geomIDs);
- vuint<M>::store_nt(&dst->primIDs,src.primIDs);
- }
-
- /* Fill triangle from triangle list */
- __forceinline void fill(const PrimRef* prims, size_t& begin, size_t end, Scene* scene)
- {
- vuint<M> vgeomID = -1, vprimID = -1;
- Vec3vf<M> v0 = zero, v1 = zero, v2 = zero;
-
- for (size_t i=0; i<M && begin<end; i++, begin++)
- {
- const PrimRef& prim = prims[begin];
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const TriangleMesh* __restrict__ const mesh = scene->get<TriangleMesh>(geomID);
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- const Vec3fa& p0 = mesh->vertex(tri.v[0]);
- const Vec3fa& p1 = mesh->vertex(tri.v[1]);
- const Vec3fa& p2 = mesh->vertex(tri.v[2]);
- vgeomID [i] = geomID;
- vprimID [i] = primID;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- }
- TriangleMv::store_nt(this,TriangleMv(v0,v1,v2,vgeomID,vprimID));
- }
-
- /* Updates the primitive */
- __forceinline BBox3fa update(TriangleMesh* mesh)
- {
- BBox3fa bounds = empty;
- vuint<M> vgeomID = -1, vprimID = -1;
- Vec3vf<M> v0 = zero, v1 = zero, v2 = zero;
-
- for (size_t i=0; i<M; i++)
- {
- if (primID(i) == -1) break;
- const unsigned geomId = geomID(i);
- const unsigned primId = primID(i);
- const TriangleMesh::Triangle& tri = mesh->triangle(primId);
- const Vec3fa p0 = mesh->vertex(tri.v[0]);
- const Vec3fa p1 = mesh->vertex(tri.v[1]);
- const Vec3fa p2 = mesh->vertex(tri.v[2]);
- bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2)));
- vgeomID [i] = geomId;
- vprimID [i] = primId;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- }
- new (this) TriangleMv(v0,v1,v2,vgeomID,vprimID);
- return bounds;
- }
-
- public:
- Vec3vf<M> v0; // 1st vertex of the triangles
- Vec3vf<M> v1; // 2nd vertex of the triangles
- Vec3vf<M> v2; // 3rd vertex of the triangles
- private:
- vuint<M> geomIDs; // geometry ID
- vuint<M> primIDs; // primitive ID
- };
-
- template<int M>
- typename TriangleMv<M>::Type TriangleMv<M>::type;
-
- typedef TriangleMv<4> Triangle4v;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/trianglev_intersector.h
deleted file mode 100644
index 6af0d5a11c..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/trianglev_intersector.h
+++ /dev/null
@@ -1,206 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "triangle.h"
-#include "triangle_intersector_pluecker.h"
-#include "triangle_intersector_moeller.h"
-#include "triangle_intersector_woop.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Intersects M triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMvIntersector1Moeller
- {
- typedef TriangleMv<M> Primitive;
- typedef MoellerTrumboreIntersector1<Mx> Precalculations;
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersect(ray,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.intersect(ray,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
-
- template<int M, int Mx, bool filter>
- struct TriangleMvIntersector1Woop
- {
- typedef TriangleMv<M> Primitive;
- typedef WoopIntersector1<Mx> intersec;
- typedef WoopPrecalculations1<M> Precalculations;
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- intersec::intersect(ray,pre,tri.v0,tri.v1,tri.v2,Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return intersec::intersect(ray,pre,tri.v0,tri.v1,tri.v2,Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
-
- /*! Intersects M triangles with K rays */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMvIntersectorKMoeller
- {
- typedef TriangleMv<M> Primitive;
- typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri)
- {
- for (size_t i=0; i<M; i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i);
- const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i);
- const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i);
- pre.intersectK(valid_i,ray,v0,v1,v2,/*UVIdentity<K>(),*/IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& tri)
- {
- vbool<K> valid0 = valid_i;
-
- for (size_t i=0; i<M; i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i);
- const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i);
- const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i);
- pre.intersectK(valid0,ray,v0,v1,v2,/*UVIdentity<K>(),*/OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx
- }
- };
-
- /*! Intersects M triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMvIntersector1Pluecker
- {
- typedef TriangleMv<M> Primitive;
- typedef PlueckerIntersector1<Mx> Precalculations;
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersect(ray,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.intersect(ray,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
- /*! Intersects M triangles with K rays */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMvIntersectorKPluecker
- {
- typedef TriangleMv<M> Primitive;
- typedef PlueckerIntersectorK<Mx,K> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri)
- {
- for (size_t i=0; i<M; i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i);
- const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i);
- const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i);
- pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& tri)
- {
- vbool<K> valid0 = valid_i;
-
- for (size_t i=0; i<M; i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i);
- const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i);
- const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i);
- pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb.h b/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb.h
deleted file mode 100644
index 63137aee16..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb.h
+++ /dev/null
@@ -1,201 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-
-namespace embree
-{
- /* Stores the vertices of M triangles in struct of array layout */
- template<int M>
- struct TriangleMvMB
- {
- public:
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
-
- static Type type;
-
- public:
-
- /* primitive supports single time segments */
- static const bool singleTimeSegment = true;
-
- /* Returns maximum number of stored triangles */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
-
- public:
-
- /* Default constructor */
- __forceinline TriangleMvMB() {}
-
- /* Construction from vertices and IDs */
- __forceinline TriangleMvMB(const Vec3vf<M>& a0, const Vec3vf<M>& a1,
- const Vec3vf<M>& b0, const Vec3vf<M>& b1,
- const Vec3vf<M>& c0, const Vec3vf<M>& c1,
- const vuint<M>& geomIDs, const vuint<M>& primIDs)
- : v0(a0), v1(b0), v2(c0), dv0(a1-a0), dv1(b1-b0), dv2(c1-c0), geomIDs(geomIDs), primIDs(primIDs) {}
-
- /* Returns a mask that tells which triangles are valid */
- __forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); }
-
- /* Returns if the specified triangle is valid */
- __forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; }
-
- /* Returns the number of stored triangles */
- __forceinline size_t size() const { return bsf(~movemask(valid())); }
-
- /* Returns the geometry IDs */
- __forceinline vuint<M>& geomID() { return geomIDs; }
- __forceinline const vuint<M>& geomID() const { return geomIDs; }
- __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; }
-
- /* Returns the primitive IDs */
- __forceinline vuint<M>& primID() { return primIDs; }
- __forceinline const vuint<M>& primID() const { return primIDs; }
- __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
-
- /* Calculate the bounds of the triangles at t0 */
- __forceinline BBox3fa bounds0() const
- {
- Vec3vf<M> lower = min(v0,v1,v2);
- Vec3vf<M> upper = max(v0,v1,v2);
- const vbool<M> mask = valid();
- lower.x = select(mask,lower.x,vfloat<M>(pos_inf));
- lower.y = select(mask,lower.y,vfloat<M>(pos_inf));
- lower.z = select(mask,lower.z,vfloat<M>(pos_inf));
- upper.x = select(mask,upper.x,vfloat<M>(neg_inf));
- upper.y = select(mask,upper.y,vfloat<M>(neg_inf));
- upper.z = select(mask,upper.z,vfloat<M>(neg_inf));
- return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)),
- Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z)));
- }
-
- /* Calculate the bounds of the triangles at t1 */
- __forceinline BBox3fa bounds1() const
- {
- const Vec3vf<M> p0 = v0+dv0;
- const Vec3vf<M> p1 = v1+dv1;
- const Vec3vf<M> p2 = v2+dv2;
- Vec3vf<M> lower = min(p0,p1,p2);
- Vec3vf<M> upper = max(p0,p1,p2);
- const vbool<M> mask = valid();
- lower.x = select(mask,lower.x,vfloat<M>(pos_inf));
- lower.y = select(mask,lower.y,vfloat<M>(pos_inf));
- lower.z = select(mask,lower.z,vfloat<M>(pos_inf));
- upper.x = select(mask,upper.x,vfloat<M>(neg_inf));
- upper.y = select(mask,upper.y,vfloat<M>(neg_inf));
- upper.z = select(mask,upper.z,vfloat<M>(neg_inf));
- return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)),
- Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z)));
- }
-
- /* Calculate the linear bounds of the primitive */
- __forceinline LBBox3fa linearBounds() const {
- return LBBox3fa(bounds0(),bounds1());
- }
-
- /* Fill triangle from triangle list */
- __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime)
- {
- vuint<M> vgeomID = -1, vprimID = -1;
- Vec3vf<M> va0 = zero, vb0 = zero, vc0 = zero;
- Vec3vf<M> va1 = zero, vb1 = zero, vc1 = zero;
-
- BBox3fa bounds0 = empty;
- BBox3fa bounds1 = empty;
-
- for (size_t i=0; i<M && begin<end; i++, begin++)
- {
- const PrimRef& prim = prims[begin];
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const TriangleMesh* __restrict__ const mesh = scene->get<TriangleMesh>(geomID);
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- const Vec3fa& a0 = mesh->vertex(tri.v[0],itime+0); bounds0.extend(a0);
- const Vec3fa& a1 = mesh->vertex(tri.v[0],itime+1); bounds1.extend(a1);
- const Vec3fa& b0 = mesh->vertex(tri.v[1],itime+0); bounds0.extend(b0);
- const Vec3fa& b1 = mesh->vertex(tri.v[1],itime+1); bounds1.extend(b1);
- const Vec3fa& c0 = mesh->vertex(tri.v[2],itime+0); bounds0.extend(c0);
- const Vec3fa& c1 = mesh->vertex(tri.v[2],itime+1); bounds1.extend(c1);
- vgeomID [i] = geomID;
- vprimID [i] = primID;
- va0.x[i] = a0.x; va0.y[i] = a0.y; va0.z[i] = a0.z;
- va1.x[i] = a1.x; va1.y[i] = a1.y; va1.z[i] = a1.z;
- vb0.x[i] = b0.x; vb0.y[i] = b0.y; vb0.z[i] = b0.z;
- vb1.x[i] = b1.x; vb1.y[i] = b1.y; vb1.z[i] = b1.z;
- vc0.x[i] = c0.x; vc0.y[i] = c0.y; vc0.z[i] = c0.z;
- vc1.x[i] = c1.x; vc1.y[i] = c1.y; vc1.z[i] = c1.z;
- }
- new (this) TriangleMvMB(va0,va1,vb0,vb1,vc0,vc1,vgeomID,vprimID);
- return LBBox3fa(bounds0,bounds1);
- }
-
- /* Fill triangle from triangle list */
- __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range)
- {
- vuint<M> vgeomID = -1, vprimID = -1;
- Vec3vf<M> va0 = zero, vb0 = zero, vc0 = zero;
- Vec3vf<M> va1 = zero, vb1 = zero, vc1 = zero;
-
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<M && begin<end; i++, begin++)
- {
- const PrimRefMB& prim = prims[begin];
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const TriangleMesh* const mesh = scene->get<TriangleMesh>(geomID);
- const range<int> itime_range = mesh->timeSegmentRange(time_range);
- assert(itime_range.size() == 1);
- const int ilower = itime_range.begin();
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- allBounds.extend(mesh->linearBounds(primID, time_range));
- const Vec3fa& a0 = mesh->vertex(tri.v[0],ilower+0);
- const Vec3fa& a1 = mesh->vertex(tri.v[0],ilower+1);
- const Vec3fa& b0 = mesh->vertex(tri.v[1],ilower+0);
- const Vec3fa& b1 = mesh->vertex(tri.v[1],ilower+1);
- const Vec3fa& c0 = mesh->vertex(tri.v[2],ilower+0);
- const Vec3fa& c1 = mesh->vertex(tri.v[2],ilower+1);
- const BBox1f time_range_v(mesh->timeStep(ilower+0),mesh->timeStep(ilower+1));
- auto a01 = globalLinear(std::make_pair(a0,a1),time_range_v);
- auto b01 = globalLinear(std::make_pair(b0,b1),time_range_v);
- auto c01 = globalLinear(std::make_pair(c0,c1),time_range_v);
- vgeomID [i] = geomID;
- vprimID [i] = primID;
- va0.x[i] = a01.first .x; va0.y[i] = a01.first .y; va0.z[i] = a01.first .z;
- va1.x[i] = a01.second.x; va1.y[i] = a01.second.y; va1.z[i] = a01.second.z;
- vb0.x[i] = b01.first .x; vb0.y[i] = b01.first .y; vb0.z[i] = b01.first .z;
- vb1.x[i] = b01.second.x; vb1.y[i] = b01.second.y; vb1.z[i] = b01.second.z;
- vc0.x[i] = c01.first .x; vc0.y[i] = c01.first .y; vc0.z[i] = c01.first .z;
- vc1.x[i] = c01.second.x; vc1.y[i] = c01.second.y; vc1.z[i] = c01.second.z;
- }
- new (this) TriangleMvMB(va0,va1,vb0,vb1,vc0,vc1,vgeomID,vprimID);
- return allBounds;
- }
-
- public:
- Vec3vf<M> v0; // 1st vertex of the triangles
- Vec3vf<M> v1; // 2nd vertex of the triangles
- Vec3vf<M> v2; // 3rd vertex of the triangles
- Vec3vf<M> dv0; // difference vector between time steps t0 and t1 for first vertex
- Vec3vf<M> dv1; // difference vector between time steps t0 and t1 for second vertex
- Vec3vf<M> dv2; // difference vector between time steps t0 and t1 for third vertex
- private:
- vuint<M> geomIDs; // geometry ID
- vuint<M> primIDs; // primitive ID
- };
-
- template<int M>
- typename TriangleMvMB<M>::Type TriangleMvMB<M>::type;
-
- typedef TriangleMvMB<4> Triangle4vMB;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb_intersector.h
deleted file mode 100644
index 35a260d826..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb_intersector.h
+++ /dev/null
@@ -1,211 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "triangle.h"
-#include "intersector_epilog.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Intersects M motion blur triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMvMBIntersector1Moeller
- {
- typedef TriangleMvMB<M> Primitive;
- typedef MoellerTrumboreIntersector1<Mx> Precalculations;
-
- /*! Intersect a ray with the M triangles and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- const Vec3vf<Mx> time(ray.time());
- const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0));
- const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1));
- const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2));
- pre.intersect(ray,v0,v1,v2,Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of M triangles. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const Vec3vf<Mx> time(ray.time());
- const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0));
- const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1));
- const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2));
- return pre.intersect(ray,v0,v1,v2,Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
- /*! Intersects M motion blur triangles with K rays. */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMvMBIntersectorKMoeller
- {
- typedef TriangleMvMB<M> Primitive;
- typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> time(ray.time());
- const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i));
- const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i));
- const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i));
- pre.intersectK(valid_i,ray,v0,v1,v2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- vbool<K> valid0 = valid_i;
-
- for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- const Vec3vf<K> time(ray.time());
- const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i));
- const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i));
- const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i));
- pre.intersectK(valid0,ray,v0,v1,v2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- const Vec3vf<Mx> time(ray.time()[k]);
- const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0));
- const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1));
- const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2));
- pre.intersect(ray,k,v0,v1,v2,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const Vec3vf<Mx> time(ray.time()[k]);
- const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0));
- const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1));
- const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2));
- return pre.intersect(ray,k,v0,v1,v2,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
- };
-
- /*! Intersects M motion blur triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMvMBIntersector1Pluecker
- {
- typedef TriangleMvMB<M> Primitive;
- typedef PlueckerIntersector1<Mx> Precalculations;
-
- /*! Intersect a ray with the M triangles and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- const Vec3vf<Mx> time(ray.time());
- const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0));
- const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1));
- const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2));
- pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of M triangles. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const Vec3vf<Mx> time(ray.time());
- const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0));
- const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1));
- const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2));
- return pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
- /*! Intersects M motion blur triangles with K rays. */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMvMBIntersectorKPluecker
- {
- typedef TriangleMvMB<M> Primitive;
- typedef PlueckerIntersectorK<Mx,K> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> time(ray.time());
- const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i));
- const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i));
- const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i));
- pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- vbool<K> valid0 = valid_i;
-
- for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- const Vec3vf<K> time(ray.time());
- const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i));
- const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i));
- const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i));
- pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- const Vec3vf<Mx> time(ray.time()[k]);
- const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0));
- const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1));
- const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2));
- pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const Vec3vf<Mx> time(ray.time()[k]);
- const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0));
- const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1));
- const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2));
- return pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/hash.h b/thirdparty/embree-aarch64/kernels/hash.h
deleted file mode 100644
index 4abbe203d6..0000000000
--- a/thirdparty/embree-aarch64/kernels/hash.h
+++ /dev/null
@@ -1,5 +0,0 @@
-
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#define RTC_HASH "6ef362f99af80c9dfe8dd2bfc582d9067897edc6"
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/bezier_curve.h b/thirdparty/embree-aarch64/kernels/subdiv/bezier_curve.h
deleted file mode 100644
index c0e78820f8..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/bezier_curve.h
+++ /dev/null
@@ -1,669 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/default.h"
-#include "../common/scene_curves.h"
-
-namespace embree
-{
- class BezierBasis
- {
- public:
-
- template<typename T>
- static __forceinline Vec4<T> eval(const T& u)
- {
- const T t1 = u;
- const T t0 = 1.0f-t1;
- const T B0 = t0 * t0 * t0;
- const T B1 = 3.0f * t1 * (t0 * t0);
- const T B2 = 3.0f * (t1 * t1) * t0;
- const T B3 = t1 * t1 * t1;
- return Vec4<T>(B0,B1,B2,B3);
- }
-
- template<typename T>
- static __forceinline Vec4<T> derivative(const T& u)
- {
- const T t1 = u;
- const T t0 = 1.0f-t1;
- const T B0 = -(t0*t0);
- const T B1 = madd(-2.0f,t0*t1,t0*t0);
- const T B2 = msub(+2.0f,t0*t1,t1*t1);
- const T B3 = +(t1*t1);
- return T(3.0f)*Vec4<T>(B0,B1,B2,B3);
- }
-
- template<typename T>
- static __forceinline Vec4<T> derivative2(const T& u)
- {
- const T t1 = u;
- const T t0 = 1.0f-t1;
- const T B0 = t0;
- const T B1 = madd(-2.0f,t0,t1);
- const T B2 = madd(-2.0f,t1,t0);
- const T B3 = t1;
- return T(6.0f)*Vec4<T>(B0,B1,B2,B3);
- }
- };
-
- struct PrecomputedBezierBasis
- {
- enum { N = 16 };
- public:
- PrecomputedBezierBasis() {}
- PrecomputedBezierBasis(int shift);
-
- /* basis for bezier evaluation */
- public:
- float c0[N+1][N+1];
- float c1[N+1][N+1];
- float c2[N+1][N+1];
- float c3[N+1][N+1];
-
- /* basis for bezier derivative evaluation */
- public:
- float d0[N+1][N+1];
- float d1[N+1][N+1];
- float d2[N+1][N+1];
- float d3[N+1][N+1];
- };
- extern PrecomputedBezierBasis bezier_basis0;
- extern PrecomputedBezierBasis bezier_basis1;
-
-
- template<typename V>
- struct LinearBezierCurve
- {
- V v0,v1;
-
- __forceinline LinearBezierCurve () {}
-
- __forceinline LinearBezierCurve (const LinearBezierCurve& other)
- : v0(other.v0), v1(other.v1) {}
-
- __forceinline LinearBezierCurve& operator= (const LinearBezierCurve& other) {
- v0 = other.v0; v1 = other.v1; return *this;
- }
-
- __forceinline LinearBezierCurve (const V& v0, const V& v1)
- : v0(v0), v1(v1) {}
-
- __forceinline V begin() const { return v0; }
- __forceinline V end () const { return v1; }
-
- bool hasRoot() const;
-
- friend embree_ostream operator<<(embree_ostream cout, const LinearBezierCurve& a) {
- return cout << "LinearBezierCurve (" << a.v0 << ", " << a.v1 << ")";
- }
- };
-
- template<> __forceinline bool LinearBezierCurve<Interval1f>::hasRoot() const {
- return numRoots(v0,v1);
- }
-
- template<typename V>
- struct QuadraticBezierCurve
- {
- V v0,v1,v2;
-
- __forceinline QuadraticBezierCurve () {}
-
- __forceinline QuadraticBezierCurve (const QuadraticBezierCurve& other)
- : v0(other.v0), v1(other.v1), v2(other.v2) {}
-
- __forceinline QuadraticBezierCurve& operator= (const QuadraticBezierCurve& other) {
- v0 = other.v0; v1 = other.v1; v2 = other.v2; return *this;
- }
-
- __forceinline QuadraticBezierCurve (const V& v0, const V& v1, const V& v2)
- : v0(v0), v1(v1), v2(v2) {}
-
- __forceinline V begin() const { return v0; }
- __forceinline V end () const { return v2; }
-
- __forceinline V interval() const {
- return merge(v0,v1,v2);
- }
-
- __forceinline BBox<V> bounds() const {
- return merge(BBox<V>(v0),BBox<V>(v1),BBox<V>(v2));
- }
-
- friend embree_ostream operator<<(embree_ostream cout, const QuadraticBezierCurve& a) {
- return cout << "QuadraticBezierCurve ( (" << a.u.lower << ", " << a.u.upper << "), " << a.v0 << ", " << a.v1 << ", " << a.v2 << ")";
- }
- };
-
-
- typedef QuadraticBezierCurve<float> QuadraticBezierCurve1f;
- typedef QuadraticBezierCurve<Vec2fa> QuadraticBezierCurve2fa;
- typedef QuadraticBezierCurve<Vec3fa> QuadraticBezierCurve3fa;
-
- template<typename Vertex>
- struct CubicBezierCurve
- {
- Vertex v0,v1,v2,v3;
-
- __forceinline CubicBezierCurve() {}
-
- template<typename T1>
- __forceinline CubicBezierCurve (const CubicBezierCurve<T1>& other)
- : v0(other.v0), v1(other.v1), v2(other.v2), v3(other.v3) {}
-
- __forceinline CubicBezierCurve& operator= (const CubicBezierCurve& other) {
- v0 = other.v0; v1 = other.v1; v2 = other.v2; v3 = other.v3; return *this;
- }
-
- __forceinline CubicBezierCurve(const Vertex& v0, const Vertex& v1, const Vertex& v2, const Vertex& v3)
- : v0(v0), v1(v1), v2(v2), v3(v3) {}
-
- __forceinline Vertex begin() const {
- return v0;
- }
-
- __forceinline Vertex end() const {
- return v3;
- }
-
- __forceinline Vertex center() const {
- return 0.25f*(v0+v1+v2+v3);
- }
-
- __forceinline Vertex begin_direction() const {
- return v1-v0;
- }
-
- __forceinline Vertex end_direction() const {
- return v3-v2;
- }
-
- __forceinline CubicBezierCurve<float> xfm(const Vertex& dx) const {
- return CubicBezierCurve<float>(dot(v0,dx),dot(v1,dx),dot(v2,dx),dot(v3,dx));
- }
-
- __forceinline CubicBezierCurve<vfloatx> vxfm(const Vertex& dx) const {
- return CubicBezierCurve<vfloatx>(dot(v0,dx),dot(v1,dx),dot(v2,dx),dot(v3,dx));
- }
-
- __forceinline CubicBezierCurve<float> xfm(const Vertex& dx, const Vertex& p) const {
- return CubicBezierCurve<float>(dot(v0-p,dx),dot(v1-p,dx),dot(v2-p,dx),dot(v3-p,dx));
- }
-
- __forceinline CubicBezierCurve<Vec3fa> xfm(const LinearSpace3fa& space) const
- {
- const Vec3fa q0 = xfmVector(space,v0);
- const Vec3fa q1 = xfmVector(space,v1);
- const Vec3fa q2 = xfmVector(space,v2);
- const Vec3fa q3 = xfmVector(space,v3);
- return CubicBezierCurve<Vec3fa>(q0,q1,q2,q3);
- }
-
- __forceinline CubicBezierCurve<Vec3fa> xfm(const LinearSpace3fa& space, const Vec3fa& p) const
- {
- const Vec3fa q0 = xfmVector(space,v0-p);
- const Vec3fa q1 = xfmVector(space,v1-p);
- const Vec3fa q2 = xfmVector(space,v2-p);
- const Vec3fa q3 = xfmVector(space,v3-p);
- return CubicBezierCurve<Vec3fa>(q0,q1,q2,q3);
- }
-
- __forceinline CubicBezierCurve<Vec3ff> xfm_pr(const LinearSpace3fa& space, const Vec3fa& p) const
- {
- const Vec3ff q0(xfmVector(space,(Vec3fa)v0-p), v0.w);
- const Vec3ff q1(xfmVector(space,(Vec3fa)v1-p), v1.w);
- const Vec3ff q2(xfmVector(space,(Vec3fa)v2-p), v2.w);
- const Vec3ff q3(xfmVector(space,(Vec3fa)v3-p), v3.w);
- return CubicBezierCurve<Vec3ff>(q0,q1,q2,q3);
- }
-
- __forceinline CubicBezierCurve<Vec3fa> xfm(const LinearSpace3fa& space, const Vec3fa& p, const float s) const
- {
- const Vec3fa q0 = xfmVector(space,s*(v0-p));
- const Vec3fa q1 = xfmVector(space,s*(v1-p));
- const Vec3fa q2 = xfmVector(space,s*(v2-p));
- const Vec3fa q3 = xfmVector(space,s*(v3-p));
- return CubicBezierCurve<Vec3fa>(q0,q1,q2,q3);
- }
-
- __forceinline int maxRoots() const;
-
- __forceinline BBox<Vertex> bounds() const {
- return merge(BBox<Vertex>(v0),BBox<Vertex>(v1),BBox<Vertex>(v2),BBox<Vertex>(v3));
- }
-
- __forceinline friend CubicBezierCurve operator +( const CubicBezierCurve& a, const CubicBezierCurve& b ) {
- return CubicBezierCurve(a.v0+b.v0,a.v1+b.v1,a.v2+b.v2,a.v3+b.v3);
- }
-
- __forceinline friend CubicBezierCurve operator -( const CubicBezierCurve& a, const CubicBezierCurve& b ) {
- return CubicBezierCurve(a.v0-b.v0,a.v1-b.v1,a.v2-b.v2,a.v3-b.v3);
- }
-
- __forceinline friend CubicBezierCurve operator -( const CubicBezierCurve& a, const Vertex& b ) {
- return CubicBezierCurve(a.v0-b,a.v1-b,a.v2-b,a.v3-b);
- }
-
- __forceinline friend CubicBezierCurve operator *( const Vertex& a, const CubicBezierCurve& b ) {
- return CubicBezierCurve(a*b.v0,a*b.v1,a*b.v2,a*b.v3);
- }
-
- __forceinline friend CubicBezierCurve cmadd( const Vertex& a, const CubicBezierCurve& b, const CubicBezierCurve& c) {
- return CubicBezierCurve(madd(a,b.v0,c.v0),madd(a,b.v1,c.v1),madd(a,b.v2,c.v2),madd(a,b.v3,c.v3));
- }
-
- __forceinline friend CubicBezierCurve clerp ( const CubicBezierCurve& a, const CubicBezierCurve& b, const Vertex& t ) {
- return cmadd((Vertex(1.0f)-t),a,t*b);
- }
-
- __forceinline friend CubicBezierCurve merge ( const CubicBezierCurve& a, const CubicBezierCurve& b ) {
- return CubicBezierCurve(merge(a.v0,b.v0),merge(a.v1,b.v1),merge(a.v2,b.v2),merge(a.v3,b.v3));
- }
-
- __forceinline void split(CubicBezierCurve& left, CubicBezierCurve& right, const float t = 0.5f) const
- {
- const Vertex p00 = v0;
- const Vertex p01 = v1;
- const Vertex p02 = v2;
- const Vertex p03 = v3;
-
- const Vertex p10 = lerp(p00,p01,t);
- const Vertex p11 = lerp(p01,p02,t);
- const Vertex p12 = lerp(p02,p03,t);
- const Vertex p20 = lerp(p10,p11,t);
- const Vertex p21 = lerp(p11,p12,t);
- const Vertex p30 = lerp(p20,p21,t);
-
- new (&left ) CubicBezierCurve(p00,p10,p20,p30);
- new (&right) CubicBezierCurve(p30,p21,p12,p03);
- }
-
- __forceinline CubicBezierCurve<Vec2vfx> split() const
- {
- const float u0 = 0.0f, u1 = 1.0f;
- const float dscale = (u1-u0)*(1.0f/(3.0f*(VSIZEX-1)));
- const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(VSIZEX-1)));
- Vec2vfx P0, dP0du; evalN(vu0,P0,dP0du); dP0du = dP0du * Vec2vfx(dscale);
- const Vec2vfx P3 = shift_right_1(P0);
- const Vec2vfx dP3du = shift_right_1(dP0du);
- const Vec2vfx P1 = P0 + dP0du;
- const Vec2vfx P2 = P3 - dP3du;
- return CubicBezierCurve<Vec2vfx>(P0,P1,P2,P3);
- }
-
- __forceinline CubicBezierCurve<Vec2vfx> split(const BBox1f& u) const
- {
- const float u0 = u.lower, u1 = u.upper;
- const float dscale = (u1-u0)*(1.0f/(3.0f*(VSIZEX-1)));
- const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(VSIZEX-1)));
- Vec2vfx P0, dP0du; evalN(vu0,P0,dP0du); dP0du = dP0du * Vec2vfx(dscale);
- const Vec2vfx P3 = shift_right_1(P0);
- const Vec2vfx dP3du = shift_right_1(dP0du);
- const Vec2vfx P1 = P0 + dP0du;
- const Vec2vfx P2 = P3 - dP3du;
- return CubicBezierCurve<Vec2vfx>(P0,P1,P2,P3);
- }
-
- __forceinline void eval(float t, Vertex& p, Vertex& dp) const
- {
- const Vertex p00 = v0;
- const Vertex p01 = v1;
- const Vertex p02 = v2;
- const Vertex p03 = v3;
-
- const Vertex p10 = lerp(p00,p01,t);
- const Vertex p11 = lerp(p01,p02,t);
- const Vertex p12 = lerp(p02,p03,t);
- const Vertex p20 = lerp(p10,p11,t);
- const Vertex p21 = lerp(p11,p12,t);
- const Vertex p30 = lerp(p20,p21,t);
-
- p = p30;
- dp = Vertex(3.0f)*(p21-p20);
- }
-
-#if 0
- __forceinline Vertex eval(float t) const
- {
- const Vertex p00 = v0;
- const Vertex p01 = v1;
- const Vertex p02 = v2;
- const Vertex p03 = v3;
-
- const Vertex p10 = lerp(p00,p01,t);
- const Vertex p11 = lerp(p01,p02,t);
- const Vertex p12 = lerp(p02,p03,t);
- const Vertex p20 = lerp(p10,p11,t);
- const Vertex p21 = lerp(p11,p12,t);
- const Vertex p30 = lerp(p20,p21,t);
-
- return p30;
- }
-#else
- __forceinline Vertex eval(const float t) const
- {
- const Vec4<float> b = BezierBasis::eval(t);
- return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
- }
-#endif
-
- __forceinline Vertex eval_dt(float t) const
- {
- const Vertex p00 = v1-v0;
- const Vertex p01 = v2-v1;
- const Vertex p02 = v3-v2;
- const Vertex p10 = lerp(p00,p01,t);
- const Vertex p11 = lerp(p01,p02,t);
- const Vertex p20 = lerp(p10,p11,t);
- return Vertex(3.0f)*p20;
- }
-
- __forceinline Vertex eval_du(const float t) const
- {
- const Vec4<float> b = BezierBasis::derivative(t);
- return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
- }
-
- __forceinline Vertex eval_dudu(const float t) const
- {
- const Vec4<float> b = BezierBasis::derivative2(t);
- return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
- }
-
- __forceinline void evalN(const vfloatx& t, Vec2vfx& p, Vec2vfx& dp) const
- {
- const Vec2vfx p00 = v0;
- const Vec2vfx p01 = v1;
- const Vec2vfx p02 = v2;
- const Vec2vfx p03 = v3;
-
- const Vec2vfx p10 = lerp(p00,p01,t);
- const Vec2vfx p11 = lerp(p01,p02,t);
- const Vec2vfx p12 = lerp(p02,p03,t);
-
- const Vec2vfx p20 = lerp(p10,p11,t);
- const Vec2vfx p21 = lerp(p11,p12,t);
-
- const Vec2vfx p30 = lerp(p20,p21,t);
-
- p = p30;
- dp = vfloatx(3.0f)*(p21-p20);
- }
-
- __forceinline void eval(const float t, Vertex& p, Vertex& dp, Vertex& ddp) const
- {
- const Vertex p00 = v0;
- const Vertex p01 = v1;
- const Vertex p02 = v2;
- const Vertex p03 = v3;
- const Vertex p10 = lerp(p00,p01,t);
- const Vertex p11 = lerp(p01,p02,t);
- const Vertex p12 = lerp(p02,p03,t);
- const Vertex p20 = lerp(p10,p11,t);
- const Vertex p21 = lerp(p11,p12,t);
- const Vertex p30 = lerp(p20,p21,t);
- p = p30;
- dp = 3.0f*(p21-p20);
- ddp = eval_dudu(t);
- }
-
- __forceinline CubicBezierCurve clip(const Interval1f& u1) const
- {
- Vertex f0,df0; eval(u1.lower,f0,df0);
- Vertex f1,df1; eval(u1.upper,f1,df1);
- float s = u1.upper-u1.lower;
- return CubicBezierCurve(f0,f0+s*(1.0f/3.0f)*df0,f1-s*(1.0f/3.0f)*df1,f1);
- }
-
- __forceinline QuadraticBezierCurve<Vertex> derivative() const
- {
- const Vertex q0 = 3.0f*(v1-v0);
- const Vertex q1 = 3.0f*(v2-v1);
- const Vertex q2 = 3.0f*(v3-v2);
- return QuadraticBezierCurve<Vertex>(q0,q1,q2);
- }
-
- __forceinline BBox<Vertex> derivative_bounds(const Interval1f& u1) const
- {
- Vertex f0,df0; eval(u1.lower,f0,df0);
- Vertex f3,df3; eval(u1.upper,f3,df3);
- const float s = u1.upper-u1.lower;
- const Vertex f1 = f0+s*(1.0f/3.0f)*df0;
- const Vertex f2 = f3-s*(1.0f/3.0f)*df3;
- const Vertex q0 = s*df0;
- const Vertex q1 = 3.0f*(f2-f1);
- const Vertex q2 = s*df3;
- return merge(BBox<Vertex>(q0),BBox<Vertex>(q1),BBox<Vertex>(q2));
- }
-
- template<int M>
- __forceinline Vec4vf<M> veval(const vfloat<M>& t) const
- {
- const Vec4vf<M> b = BezierBasis::eval(t);
- return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
- }
-
- template<int M>
- __forceinline Vec4vf<M> veval_du(const vfloat<M>& t) const
- {
- const Vec4vf<M> b = BezierBasis::derivative(t);
- return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
- }
-
- template<int M>
- __forceinline Vec4vf<M> veval_dudu(const vfloat<M>& t) const
- {
- const Vec4vf<M> b = BezierBasis::derivative2(t);
- return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
- }
-
- template<int M>
- __forceinline void veval(const vfloat<M>& t, Vec4vf<M>& p, Vec4vf<M>& dp) const
- {
- const Vec4vf<M> p00 = v0;
- const Vec4vf<M> p01 = v1;
- const Vec4vf<M> p02 = v2;
- const Vec4vf<M> p03 = v3;
-
- const Vec4vf<M> p10 = lerp(p00,p01,t);
- const Vec4vf<M> p11 = lerp(p01,p02,t);
- const Vec4vf<M> p12 = lerp(p02,p03,t);
- const Vec4vf<M> p20 = lerp(p10,p11,t);
- const Vec4vf<M> p21 = lerp(p11,p12,t);
- const Vec4vf<M> p30 = lerp(p20,p21,t);
-
- p = p30;
- dp = vfloat<M>(3.0f)*(p21-p20);
- }
-
- template<int M, typename Vec = Vec4vf<M>>
- __forceinline Vec eval0(const int ofs, const int size) const
- {
- assert(size <= PrecomputedBezierBasis::N);
- assert(ofs <= size);
- return madd(vfloat<M>::loadu(&bezier_basis0.c0[size][ofs]), Vec(v0),
- madd(vfloat<M>::loadu(&bezier_basis0.c1[size][ofs]), Vec(v1),
- madd(vfloat<M>::loadu(&bezier_basis0.c2[size][ofs]), Vec(v2),
- vfloat<M>::loadu(&bezier_basis0.c3[size][ofs]) * Vec(v3))));
- }
-
- template<int M, typename Vec = Vec4vf<M>>
- __forceinline Vec eval1(const int ofs, const int size) const
- {
- assert(size <= PrecomputedBezierBasis::N);
- assert(ofs <= size);
- return madd(vfloat<M>::loadu(&bezier_basis1.c0[size][ofs]), Vec(v0),
- madd(vfloat<M>::loadu(&bezier_basis1.c1[size][ofs]), Vec(v1),
- madd(vfloat<M>::loadu(&bezier_basis1.c2[size][ofs]), Vec(v2),
- vfloat<M>::loadu(&bezier_basis1.c3[size][ofs]) * Vec(v3))));
- }
-
- template<int M, typename Vec = Vec4vf<M>>
- __forceinline Vec derivative0(const int ofs, const int size) const
- {
- assert(size <= PrecomputedBezierBasis::N);
- assert(ofs <= size);
- return madd(vfloat<M>::loadu(&bezier_basis0.d0[size][ofs]), Vec(v0),
- madd(vfloat<M>::loadu(&bezier_basis0.d1[size][ofs]), Vec(v1),
- madd(vfloat<M>::loadu(&bezier_basis0.d2[size][ofs]), Vec(v2),
- vfloat<M>::loadu(&bezier_basis0.d3[size][ofs]) * Vec(v3))));
- }
-
- template<int M, typename Vec = Vec4vf<M>>
- __forceinline Vec derivative1(const int ofs, const int size) const
- {
- assert(size <= PrecomputedBezierBasis::N);
- assert(ofs <= size);
- return madd(vfloat<M>::loadu(&bezier_basis1.d0[size][ofs]), Vec(v0),
- madd(vfloat<M>::loadu(&bezier_basis1.d1[size][ofs]), Vec(v1),
- madd(vfloat<M>::loadu(&bezier_basis1.d2[size][ofs]), Vec(v2),
- vfloat<M>::loadu(&bezier_basis1.d3[size][ofs]) * Vec(v3))));
- }
-
- /* calculates bounds of bezier curve geometry */
- __forceinline BBox3fa accurateBounds() const
- {
- const int N = 7;
- const float scale = 1.0f/(3.0f*(N-1));
- Vec3vfx pl(pos_inf), pu(neg_inf);
- for (int i=0; i<=N; i+=VSIZEX)
- {
- vintx vi = vintx(i)+vintx(step);
- vboolx valid = vi <= vintx(N);
- const Vec3vfx p = eval0<VSIZEX,Vec3vf<VSIZEX>>(i,N);
- const Vec3vfx dp = derivative0<VSIZEX,Vec3vf<VSIZEX>>(i,N);
- const Vec3vfx pm = p-Vec3vfx(scale)*select(vi!=vintx(0),dp,Vec3vfx(zero));
- const Vec3vfx pp = p+Vec3vfx(scale)*select(vi!=vintx(N),dp,Vec3vfx(zero));
- pl = select(valid,min(pl,p,pm,pp),pl); // FIXME: use masked min
- pu = select(valid,max(pu,p,pm,pp),pu); // FIXME: use masked min
- }
- const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
- const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
- return BBox3fa(lower,upper);
- }
-
- /* calculates bounds of bezier curve geometry */
- __forceinline BBox3fa accurateRoundBounds() const
- {
- const int N = 7;
- const float scale = 1.0f/(3.0f*(N-1));
- Vec4vfx pl(pos_inf), pu(neg_inf);
- for (int i=0; i<=N; i+=VSIZEX)
- {
- vintx vi = vintx(i)+vintx(step);
- vboolx valid = vi <= vintx(N);
- const Vec4vfx p = eval0<VSIZEX>(i,N);
- const Vec4vfx dp = derivative0<VSIZEX>(i,N);
- const Vec4vfx pm = p-Vec4vfx(scale)*select(vi!=vintx(0),dp,Vec4vfx(zero));
- const Vec4vfx pp = p+Vec4vfx(scale)*select(vi!=vintx(N),dp,Vec4vfx(zero));
- pl = select(valid,min(pl,p,pm,pp),pl); // FIXME: use masked min
- pu = select(valid,max(pu,p,pm,pp),pu); // FIXME: use masked min
- }
- const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
- const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
- const float r_min = reduce_min(pl.w);
- const float r_max = reduce_max(pu.w);
- const Vec3fa upper_r = Vec3fa(max(abs(r_min),abs(r_max)));
- return enlarge(BBox3fa(lower,upper),upper_r);
- }
-
- /* calculates bounds when tessellated into N line segments */
- __forceinline BBox3fa accurateFlatBounds(int N) const
- {
- if (likely(N == 4))
- {
- const Vec4vf4 pi = eval0<4>(0,4);
- const Vec3fa lower(reduce_min(pi.x),reduce_min(pi.y),reduce_min(pi.z));
- const Vec3fa upper(reduce_max(pi.x),reduce_max(pi.y),reduce_max(pi.z));
- const Vec3fa upper_r = Vec3fa(reduce_max(abs(pi.w)));
- return enlarge(BBox3fa(min(lower,v3),max(upper,v3)),max(upper_r,Vec3fa(abs(v3.w))));
- }
- else
- {
- Vec3vfx pl(pos_inf), pu(neg_inf); vfloatx ru(0.0f);
- for (int i=0; i<N; i+=VSIZEX)
- {
- vboolx valid = vintx(i)+vintx(step) < vintx(N);
- const Vec4vfx pi = eval0<VSIZEX>(i,N);
-
- pl.x = select(valid,min(pl.x,pi.x),pl.x); // FIXME: use masked min
- pl.y = select(valid,min(pl.y,pi.y),pl.y);
- pl.z = select(valid,min(pl.z,pi.z),pl.z);
-
- pu.x = select(valid,max(pu.x,pi.x),pu.x); // FIXME: use masked min
- pu.y = select(valid,max(pu.y,pi.y),pu.y);
- pu.z = select(valid,max(pu.z,pi.z),pu.z);
-
- ru = select(valid,max(ru,abs(pi.w)),ru);
- }
- const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
- const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
- const Vec3fa upper_r(reduce_max(ru));
- return enlarge(BBox3fa(min(lower,v3),max(upper,v3)),max(upper_r,Vec3fa(abs(v3.w))));
- }
- }
-
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const CubicBezierCurve& curve) {
- return cout << "CubicBezierCurve { v0 = " << curve.v0 << ", v1 = " << curve.v1 << ", v2 = " << curve.v2 << ", v3 = " << curve.v3 << " }";
- }
- };
-
-#if defined(__AVX__)
- template<>
- __forceinline CubicBezierCurve<vfloat4> CubicBezierCurve<vfloat4>::clip(const Interval1f& u1) const
- {
- const vfloat8 p00 = vfloat8(v0);
- const vfloat8 p01 = vfloat8(v1);
- const vfloat8 p02 = vfloat8(v2);
- const vfloat8 p03 = vfloat8(v3);
-
- const vfloat8 t(vfloat4(u1.lower),vfloat4(u1.upper));
- const vfloat8 p10 = lerp(p00,p01,t);
- const vfloat8 p11 = lerp(p01,p02,t);
- const vfloat8 p12 = lerp(p02,p03,t);
- const vfloat8 p20 = lerp(p10,p11,t);
- const vfloat8 p21 = lerp(p11,p12,t);
- const vfloat8 p30 = lerp(p20,p21,t);
-
- const vfloat8 f01 = p30;
- const vfloat8 df01 = vfloat8(3.0f)*(p21-p20);
-
- const vfloat4 f0 = extract4<0>(f01), f1 = extract4<1>(f01);
- const vfloat4 df0 = extract4<0>(df01), df1 = extract4<1>(df01);
- const float s = u1.upper-u1.lower;
- return CubicBezierCurve(f0,f0+s*(1.0f/3.0f)*df0,f1-s*(1.0f/3.0f)*df1,f1);
- }
-#endif
-
- template<typename Vertex> using BezierCurveT = CubicBezierCurve<Vertex>;
-
- typedef CubicBezierCurve<float> CubicBezierCurve1f;
- typedef CubicBezierCurve<Vec2fa> CubicBezierCurve2fa;
- typedef CubicBezierCurve<Vec3fa> CubicBezierCurve3fa;
- typedef CubicBezierCurve<Vec3fa> BezierCurve3fa;
-
- template<> __forceinline int CubicBezierCurve<float>::maxRoots() const
- {
- float eps = 1E-4f;
- bool neg0 = v0 <= 0.0f; bool zero0 = fabs(v0) < eps;
- bool neg1 = v1 <= 0.0f; bool zero1 = fabs(v1) < eps;
- bool neg2 = v2 <= 0.0f; bool zero2 = fabs(v2) < eps;
- bool neg3 = v3 <= 0.0f; bool zero3 = fabs(v3) < eps;
- return (neg0 != neg1 || zero0) + (neg1 != neg2 || zero1) + (neg2 != neg3 || zero2 || zero3);
- }
-
- template<> __forceinline int CubicBezierCurve<Interval1f>::maxRoots() const {
- return numRoots(v0,v1) + numRoots(v1,v2) + numRoots(v2,v3);
- }
-
- __forceinline CubicBezierCurve<Vec3ff> enlargeRadiusToMinWidth(const IntersectContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const CubicBezierCurve<Vec3ff>& curve)
- {
- return CubicBezierCurve<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,curve.v0),
- enlargeRadiusToMinWidth(context,geom,ray_org,curve.v1),
- enlargeRadiusToMinWidth(context,geom,ray_org,curve.v2),
- enlargeRadiusToMinWidth(context,geom,ray_org,curve.v3));
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/bezier_patch.h b/thirdparty/embree-aarch64/kernels/subdiv/bezier_patch.h
deleted file mode 100644
index d87ed41ccb..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/bezier_patch.h
+++ /dev/null
@@ -1,372 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "catmullclark_patch.h"
-#include "bezier_curve.h"
-
-namespace embree
-{
- template<class T, class S>
- static __forceinline T deCasteljau(const S& uu, const T& v0, const T& v1, const T& v2, const T& v3)
- {
- const T v0_1 = lerp(v0,v1,uu);
- const T v1_1 = lerp(v1,v2,uu);
- const T v2_1 = lerp(v2,v3,uu);
- const T v0_2 = lerp(v0_1,v1_1,uu);
- const T v1_2 = lerp(v1_1,v2_1,uu);
- const T v0_3 = lerp(v0_2,v1_2,uu);
- return v0_3;
- }
-
- template<class T, class S>
- static __forceinline T deCasteljau_tangent(const S& uu, const T& v0, const T& v1, const T& v2, const T& v3)
- {
- const T v0_1 = lerp(v0,v1,uu);
- const T v1_1 = lerp(v1,v2,uu);
- const T v2_1 = lerp(v2,v3,uu);
- const T v0_2 = lerp(v0_1,v1_1,uu);
- const T v1_2 = lerp(v1_1,v2_1,uu);
- return S(3.0f)*(v1_2-v0_2);
- }
-
- template<typename Vertex>
- __forceinline Vertex computeInnerBezierControlPoint(const Vertex v[4][4], const size_t y, const size_t x) {
- return 1.0f / 36.0f * (16.0f * v[y][x] + 4.0f * (v[y-1][x] + v[y+1][x] + v[y][x-1] + v[y][x+1]) + (v[y-1][x-1] + v[y+1][x+1] + v[y-1][x+1] + v[y+1][x-1]));
- }
-
- template<typename Vertex>
- __forceinline Vertex computeTopEdgeBezierControlPoint(const Vertex v[4][4], const size_t y, const size_t x) {
- return 1.0f / 18.0f * (8.0f * v[y][x] + 4.0f * v[y-1][x] + 2.0f * (v[y][x-1] + v[y][x+1]) + (v[y-1][x-1] + v[y-1][x+1]));
- }
-
- template<typename Vertex>
- __forceinline Vertex computeBottomEdgeBezierControlPoint(const Vertex v[4][4], const size_t y, const size_t x) {
- return 1.0f / 18.0f * (8.0f * v[y][x] + 4.0f * v[y+1][x] + 2.0f * (v[y][x-1] + v[y][x+1]) + v[y+1][x-1] + v[y+1][x+1]);
- }
-
- template<typename Vertex>
- __forceinline Vertex computeLeftEdgeBezierControlPoint(const Vertex v[4][4], const size_t y, const size_t x) {
- return 1.0f / 18.0f * (8.0f * v[y][x] + 4.0f * v[y][x-1] + 2.0f * (v[y-1][x] + v[y+1][x]) + v[y-1][x-1] + v[y+1][x-1]);
- }
-
- template<typename Vertex>
- __forceinline Vertex computeRightEdgeBezierControlPoint(const Vertex v[4][4], const size_t y, const size_t x) {
- return 1.0f / 18.0f * (8.0f * v[y][x] + 4.0f * v[y][x+1] + 2.0f * (v[y-1][x] + v[y+1][x]) + v[y-1][x+1] + v[y+1][x+1]);
- }
-
- template<typename Vertex>
- __forceinline Vertex computeCornerBezierControlPoint(const Vertex v[4][4], const size_t y, const size_t x, const ssize_t delta_y, const ssize_t delta_x)
- {
- return 1.0f / 9.0f * (4.0f * v[y][x] + 2.0f * (v[y+delta_y][x] + v[y][x+delta_x]) + v[y+delta_y][x+delta_x]);
- }
-
- template<typename Vertex, typename Vertex_t>
- class __aligned(64) BezierPatchT
- {
- public:
- Vertex matrix[4][4];
-
- public:
-
- __forceinline BezierPatchT() {}
-
- __forceinline BezierPatchT (const HalfEdge* edge, const char* vertices, size_t stride);
-
- __forceinline BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch);
-
- __forceinline BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch,
- const BezierCurveT<Vertex>* border0,
- const BezierCurveT<Vertex>* border1,
- const BezierCurveT<Vertex>* border2,
- const BezierCurveT<Vertex>* border3);
-
- __forceinline BezierPatchT(const BSplinePatchT<Vertex,Vertex_t>& source)
- {
- /* compute inner bezier control points */
- matrix[0][0] = computeInnerBezierControlPoint(source.v,1,1);
- matrix[0][3] = computeInnerBezierControlPoint(source.v,1,2);
- matrix[3][3] = computeInnerBezierControlPoint(source.v,2,2);
- matrix[3][0] = computeInnerBezierControlPoint(source.v,2,1);
-
- /* compute top edge control points */
- matrix[0][1] = computeRightEdgeBezierControlPoint(source.v,1,1);
- matrix[0][2] = computeLeftEdgeBezierControlPoint(source.v,1,2);
-
- /* compute buttom edge control points */
- matrix[3][1] = computeRightEdgeBezierControlPoint(source.v,2,1);
- matrix[3][2] = computeLeftEdgeBezierControlPoint(source.v,2,2);
-
- /* compute left edge control points */
- matrix[1][0] = computeBottomEdgeBezierControlPoint(source.v,1,1);
- matrix[2][0] = computeTopEdgeBezierControlPoint(source.v,2,1);
-
- /* compute right edge control points */
- matrix[1][3] = computeBottomEdgeBezierControlPoint(source.v,1,2);
- matrix[2][3] = computeTopEdgeBezierControlPoint(source.v,2,2);
-
- /* compute corner control points */
- matrix[1][1] = computeCornerBezierControlPoint(source.v,1,1, 1, 1);
- matrix[1][2] = computeCornerBezierControlPoint(source.v,1,2, 1,-1);
- matrix[2][2] = computeCornerBezierControlPoint(source.v,2,2,-1,-1);
- matrix[2][1] = computeCornerBezierControlPoint(source.v,2,1,-1, 1);
- }
-
- static __forceinline Vertex_t bilinear(const Vec4f Bu, const Vertex matrix[4][4], const Vec4f Bv)
- {
- const Vertex_t M0 = madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3])));
- const Vertex_t M1 = madd(Bu.x,matrix[1][0],madd(Bu.y,matrix[1][1],madd(Bu.z,matrix[1][2],Bu.w * matrix[1][3])));
- const Vertex_t M2 = madd(Bu.x,matrix[2][0],madd(Bu.y,matrix[2][1],madd(Bu.z,matrix[2][2],Bu.w * matrix[2][3])));
- const Vertex_t M3 = madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])));
- return madd(Bv.x,M0,madd(Bv.y,M1,madd(Bv.z,M2,Bv.w*M3)));
- }
-
- static __forceinline Vertex_t eval(const Vertex matrix[4][4], const float uu, const float vv)
- {
- const Vec4f Bu = BezierBasis::eval(uu);
- const Vec4f Bv = BezierBasis::eval(vv);
- return bilinear(Bu,matrix,Bv);
- }
-
- static __forceinline Vertex_t eval_du(const Vertex matrix[4][4], const float uu, const float vv)
- {
- const Vec4f Bu = BezierBasis::derivative(uu);
- const Vec4f Bv = BezierBasis::eval(vv);
- return bilinear(Bu,matrix,Bv);
- }
-
- static __forceinline Vertex_t eval_dv(const Vertex matrix[4][4], const float uu, const float vv)
- {
- const Vec4f Bu = BezierBasis::eval(uu);
- const Vec4f Bv = BezierBasis::derivative(vv);
- return bilinear(Bu,matrix,Bv);
- }
-
- static __forceinline Vertex_t eval_dudu(const Vertex matrix[4][4], const float uu, const float vv)
- {
- const Vec4f Bu = BezierBasis::derivative2(uu);
- const Vec4f Bv = BezierBasis::eval(vv);
- return bilinear(Bu,matrix,Bv);
- }
-
- static __forceinline Vertex_t eval_dvdv(const Vertex matrix[4][4], const float uu, const float vv)
- {
- const Vec4f Bu = BezierBasis::eval(uu);
- const Vec4f Bv = BezierBasis::derivative2(vv);
- return bilinear(Bu,matrix,Bv);
- }
-
- static __forceinline Vertex_t eval_dudv(const Vertex matrix[4][4], const float uu, const float vv)
- {
- const Vec4f Bu = BezierBasis::derivative(uu);
- const Vec4f Bv = BezierBasis::derivative(vv);
- return bilinear(Bu,matrix,Bv);
- }
-
- static __forceinline Vertex_t normal(const Vertex matrix[4][4], const float uu, const float vv)
- {
- const Vertex_t dPdu = eval_du(matrix,uu,vv);
- const Vertex_t dPdv = eval_dv(matrix,uu,vv);
- return cross(dPdu,dPdv);
- }
-
- __forceinline Vertex_t normal(const float uu, const float vv)
- {
- const Vertex_t dPdu = eval_du(matrix,uu,vv);
- const Vertex_t dPdv = eval_dv(matrix,uu,vv);
- return cross(dPdu,dPdv);
- }
-
- __forceinline Vertex_t eval(const float uu, const float vv) const {
- return eval(matrix,uu,vv);
- }
-
- __forceinline Vertex_t eval_du(const float uu, const float vv) const {
- return eval_du(matrix,uu,vv);
- }
-
- __forceinline Vertex_t eval_dv(const float uu, const float vv) const {
- return eval_dv(matrix,uu,vv);
- }
-
- __forceinline Vertex_t eval_dudu(const float uu, const float vv) const {
- return eval_dudu(matrix,uu,vv);
- }
-
- __forceinline Vertex_t eval_dvdv(const float uu, const float vv) const {
- return eval_dvdv(matrix,uu,vv);
- }
-
- __forceinline Vertex_t eval_dudv(const float uu, const float vv) const {
- return eval_dudv(matrix,uu,vv);
- }
-
- __forceinline void eval(const float u, const float v, Vertex* P, Vertex* dPdu, Vertex* dPdv, Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv, const float dscale = 1.0f) const
- {
- if (P) {
- *P = eval(u,v);
- }
- if (dPdu) {
- assert(dPdu); *dPdu = eval_du(u,v)*dscale;
- assert(dPdv); *dPdv = eval_dv(u,v)*dscale;
- }
- if (ddPdudu) {
- assert(ddPdudu); *ddPdudu = eval_dudu(u,v)*sqr(dscale);
- assert(ddPdvdv); *ddPdvdv = eval_dvdv(u,v)*sqr(dscale);
- assert(ddPdudv); *ddPdudv = eval_dudv(u,v)*sqr(dscale);
- }
- }
-
- template<class vfloat>
- __forceinline vfloat eval(const size_t i, const vfloat& uu, const vfloat& vv, const Vec4<vfloat>& u_n, const Vec4<vfloat>& v_n) const
- {
- const vfloat curve0_x = v_n[0] * vfloat(matrix[0][0][i]) + v_n[1] * vfloat(matrix[1][0][i]) + v_n[2] * vfloat(matrix[2][0][i]) + v_n[3] * vfloat(matrix[3][0][i]);
- const vfloat curve1_x = v_n[0] * vfloat(matrix[0][1][i]) + v_n[1] * vfloat(matrix[1][1][i]) + v_n[2] * vfloat(matrix[2][1][i]) + v_n[3] * vfloat(matrix[3][1][i]);
- const vfloat curve2_x = v_n[0] * vfloat(matrix[0][2][i]) + v_n[1] * vfloat(matrix[1][2][i]) + v_n[2] * vfloat(matrix[2][2][i]) + v_n[3] * vfloat(matrix[3][2][i]);
- const vfloat curve3_x = v_n[0] * vfloat(matrix[0][3][i]) + v_n[1] * vfloat(matrix[1][3][i]) + v_n[2] * vfloat(matrix[2][3][i]) + v_n[3] * vfloat(matrix[3][3][i]);
- return u_n[0] * curve0_x + u_n[1] * curve1_x + u_n[2] * curve2_x + u_n[3] * curve3_x;
- }
-
- template<typename vbool, typename vfloat>
- __forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv,
- float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
- const float dscale, const size_t dstride, const size_t N) const
- {
- if (P) {
- const Vec4<vfloat> u_n = BezierBasis::eval(uu);
- const Vec4<vfloat> v_n = BezierBasis::eval(vv);
- for (size_t i=0; i<N; i++) vfloat::store(valid,P+i*dstride,eval(i,uu,vv,u_n,v_n));
- }
- if (dPdu)
- {
- {
- assert(dPdu);
- const Vec4<vfloat> u_n = BezierBasis::derivative(uu);
- const Vec4<vfloat> v_n = BezierBasis::eval(vv);
- for (size_t i=0; i<N; i++) vfloat::store(valid,dPdu+i*dstride,eval(i,uu,vv,u_n,v_n)*dscale);
- }
- {
- assert(dPdv);
- const Vec4<vfloat> u_n = BezierBasis::eval(uu);
- const Vec4<vfloat> v_n = BezierBasis::derivative(vv);
- for (size_t i=0; i<N; i++) vfloat::store(valid,dPdv+i*dstride,eval(i,uu,vv,u_n,v_n)*dscale);
- }
- }
- if (ddPdudu)
- {
- {
- assert(ddPdudu);
- const Vec4<vfloat> u_n = BezierBasis::derivative2(uu);
- const Vec4<vfloat> v_n = BezierBasis::eval(vv);
- for (size_t i=0; i<N; i++) vfloat::store(valid,ddPdudu+i*dstride,eval(i,uu,vv,u_n,v_n)*sqr(dscale));
- }
- {
- assert(ddPdvdv);
- const Vec4<vfloat> u_n = BezierBasis::eval(uu);
- const Vec4<vfloat> v_n = BezierBasis::derivative2(vv);
- for (size_t i=0; i<N; i++) vfloat::store(valid,ddPdvdv+i*dstride,eval(i,uu,vv,u_n,v_n)*sqr(dscale));
- }
- {
- assert(ddPdudv);
- const Vec4<vfloat> u_n = BezierBasis::derivative(uu);
- const Vec4<vfloat> v_n = BezierBasis::derivative(vv);
- for (size_t i=0; i<N; i++) vfloat::store(valid,ddPdudv+i*dstride,eval(i,uu,vv,u_n,v_n)*sqr(dscale));
- }
- }
- }
-
- template<typename T>
- static __forceinline Vec3<T> eval(const Vertex matrix[4][4], const T& uu, const T& vv)
- {
- const T one_minus_uu = 1.0f - uu;
- const T one_minus_vv = 1.0f - vv;
-
- const T B0_u = one_minus_uu * one_minus_uu * one_minus_uu;
- const T B0_v = one_minus_vv * one_minus_vv * one_minus_vv;
- const T B1_u = 3.0f * (one_minus_uu * uu * one_minus_uu);
- const T B1_v = 3.0f * (one_minus_vv * vv * one_minus_vv);
- const T B2_u = 3.0f * (uu * one_minus_uu * uu);
- const T B2_v = 3.0f * (vv * one_minus_vv * vv);
- const T B3_u = uu * uu * uu;
- const T B3_v = vv * vv * vv;
-
- const T x =
- madd(B0_v,madd(B0_u,matrix[0][0].x,madd(B1_u,matrix[0][1].x,madd(B2_u,matrix[0][2].x,B3_u*matrix[0][3].x))),
- madd(B1_v,madd(B0_u,matrix[1][0].x,madd(B1_u,matrix[1][1].x,madd(B2_u,matrix[1][2].x,B3_u*matrix[1][3].x))),
- madd(B2_v,madd(B0_u,matrix[2][0].x,madd(B1_u,matrix[2][1].x,madd(B2_u,matrix[2][2].x,B3_u*matrix[2][3].x))),
- B3_v*madd(B0_u,matrix[3][0].x,madd(B1_u,matrix[3][1].x,madd(B2_u,matrix[3][2].x,B3_u*matrix[3][3].x))))));
-
- const T y =
- madd(B0_v,madd(B0_u,matrix[0][0].y,madd(B1_u,matrix[0][1].y,madd(B2_u,matrix[0][2].y,B3_u*matrix[0][3].y))),
- madd(B1_v,madd(B0_u,matrix[1][0].y,madd(B1_u,matrix[1][1].y,madd(B2_u,matrix[1][2].y,B3_u*matrix[1][3].y))),
- madd(B2_v,madd(B0_u,matrix[2][0].y,madd(B1_u,matrix[2][1].y,madd(B2_u,matrix[2][2].y,B3_u*matrix[2][3].y))),
- B3_v*madd(B0_u,matrix[3][0].y,madd(B1_u,matrix[3][1].y,madd(B2_u,matrix[3][2].y,B3_u*matrix[3][3].y))))));
-
- const T z =
- madd(B0_v,madd(B0_u,matrix[0][0].z,madd(B1_u,matrix[0][1].z,madd(B2_u,matrix[0][2].z,B3_u*matrix[0][3].z))),
- madd(B1_v,madd(B0_u,matrix[1][0].z,madd(B1_u,matrix[1][1].z,madd(B2_u,matrix[1][2].z,B3_u*matrix[1][3].z))),
- madd(B2_v,madd(B0_u,matrix[2][0].z,madd(B1_u,matrix[2][1].z,madd(B2_u,matrix[2][2].z,B3_u*matrix[2][3].z))),
- B3_v*madd(B0_u,matrix[3][0].z,madd(B1_u,matrix[3][1].z,madd(B2_u,matrix[3][2].z,B3_u*matrix[3][3].z))))));
-
- return Vec3<T>(x,y,z);
- }
-
- template<typename vfloat>
- __forceinline Vec3<vfloat> eval(const vfloat& uu, const vfloat& vv) const {
- return eval(matrix,uu,vv);
- }
-
- template<class T>
- static __forceinline Vec3<T> normal(const Vertex matrix[4][4], const T& uu, const T& vv)
- {
-
- const Vec3<T> matrix_00 = Vec3<T>(matrix[0][0].x,matrix[0][0].y,matrix[0][0].z);
- const Vec3<T> matrix_01 = Vec3<T>(matrix[0][1].x,matrix[0][1].y,matrix[0][1].z);
- const Vec3<T> matrix_02 = Vec3<T>(matrix[0][2].x,matrix[0][2].y,matrix[0][2].z);
- const Vec3<T> matrix_03 = Vec3<T>(matrix[0][3].x,matrix[0][3].y,matrix[0][3].z);
-
- const Vec3<T> matrix_10 = Vec3<T>(matrix[1][0].x,matrix[1][0].y,matrix[1][0].z);
- const Vec3<T> matrix_11 = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z);
- const Vec3<T> matrix_12 = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z);
- const Vec3<T> matrix_13 = Vec3<T>(matrix[1][3].x,matrix[1][3].y,matrix[1][3].z);
-
- const Vec3<T> matrix_20 = Vec3<T>(matrix[2][0].x,matrix[2][0].y,matrix[2][0].z);
- const Vec3<T> matrix_21 = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z);
- const Vec3<T> matrix_22 = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z);
- const Vec3<T> matrix_23 = Vec3<T>(matrix[2][3].x,matrix[2][3].y,matrix[2][3].z);
-
- const Vec3<T> matrix_30 = Vec3<T>(matrix[3][0].x,matrix[3][0].y,matrix[3][0].z);
- const Vec3<T> matrix_31 = Vec3<T>(matrix[3][1].x,matrix[3][1].y,matrix[3][1].z);
- const Vec3<T> matrix_32 = Vec3<T>(matrix[3][2].x,matrix[3][2].y,matrix[3][2].z);
- const Vec3<T> matrix_33 = Vec3<T>(matrix[3][3].x,matrix[3][3].y,matrix[3][3].z);
-
- /* tangentU */
- const Vec3<T> col0 = deCasteljau(vv, matrix_00, matrix_10, matrix_20, matrix_30);
- const Vec3<T> col1 = deCasteljau(vv, matrix_01, matrix_11, matrix_21, matrix_31);
- const Vec3<T> col2 = deCasteljau(vv, matrix_02, matrix_12, matrix_22, matrix_32);
- const Vec3<T> col3 = deCasteljau(vv, matrix_03, matrix_13, matrix_23, matrix_33);
-
- const Vec3<T> tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3);
-
- /* tangentV */
- const Vec3<T> row0 = deCasteljau(uu, matrix_00, matrix_01, matrix_02, matrix_03);
- const Vec3<T> row1 = deCasteljau(uu, matrix_10, matrix_11, matrix_12, matrix_13);
- const Vec3<T> row2 = deCasteljau(uu, matrix_20, matrix_21, matrix_22, matrix_23);
- const Vec3<T> row3 = deCasteljau(uu, matrix_30, matrix_31, matrix_32, matrix_33);
-
- const Vec3<T> tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3);
-
- /* normal = tangentU x tangentV */
- const Vec3<T> n = cross(tangentU,tangentV);
- return n;
- }
-
- template<typename vfloat>
- __forceinline Vec3<vfloat> normal(const vfloat& uu, const vfloat& vv) const {
- return normal(matrix,uu,vv);
- }
- };
-
- typedef BezierPatchT<Vec3fa,Vec3fa_t> BezierPatch3fa;
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/bilinear_patch.h b/thirdparty/embree-aarch64/kernels/subdiv/bilinear_patch.h
deleted file mode 100644
index 35748754bd..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/bilinear_patch.h
+++ /dev/null
@@ -1,191 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "catmullclark_patch.h"
-#include "bezier_curve.h"
-
-namespace embree
-{
- template<typename Vertex, typename Vertex_t = Vertex>
- class __aligned(64) BilinearPatchT
- {
- typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClarkRing;
- typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
-
- public:
- Vertex v[4];
-
- public:
-
- __forceinline BilinearPatchT () {}
-
- __forceinline BilinearPatchT (const HalfEdge* edge, const BufferView<Vertex>& vertices) {
- init(edge,vertices.getPtr(),vertices.getStride());
- }
-
- __forceinline BilinearPatchT (const HalfEdge* edge, const char* vertices, size_t stride) {
- init(edge,vertices,stride);
- }
-
- __forceinline void init (const HalfEdge* edge, const char* vertices, size_t stride)
- {
- v[0] = Vertex::loadu(vertices+edge->getStartVertexIndex()*stride); edge = edge->next();
- v[1] = Vertex::loadu(vertices+edge->getStartVertexIndex()*stride); edge = edge->next();
- v[2] = Vertex::loadu(vertices+edge->getStartVertexIndex()*stride); edge = edge->next();
- v[3] = Vertex::loadu(vertices+edge->getStartVertexIndex()*stride); edge = edge->next();
- }
-
- __forceinline BilinearPatchT (const CatmullClarkPatch& patch)
- {
- v[0] = patch.ring[0].getLimitVertex();
- v[1] = patch.ring[1].getLimitVertex();
- v[2] = patch.ring[2].getLimitVertex();
- v[3] = patch.ring[3].getLimitVertex();
- }
-
- __forceinline BBox<Vertex> bounds() const
- {
-
- BBox<Vertex> bounds (v[0]);
- bounds.extend(v[1]);
- bounds.extend(v[2]);
- bounds.extend(v[3]);
- return bounds;
- }
-
- __forceinline Vertex eval(const float uu, const float vv) const {
- return lerp(lerp(v[0],v[1],uu),lerp(v[3],v[2],uu),vv);
- }
-
- __forceinline Vertex eval_du(const float uu, const float vv) const {
- return lerp(v[1]-v[0],v[2]-v[3],vv);
- }
-
- __forceinline Vertex eval_dv(const float uu, const float vv) const {
- return lerp(v[3]-v[0],v[2]-v[1],uu);
- }
-
- __forceinline Vertex eval_dudu(const float uu, const float vv) const {
- return Vertex(zero);
- }
-
- __forceinline Vertex eval_dvdv(const float uu, const float vv) const {
- return Vertex(zero);
- }
-
- __forceinline Vertex eval_dudv(const float uu, const float vv) const {
- return (v[2]-v[3]) - (v[1]-v[0]);
- }
-
- __forceinline Vertex normal(const float uu, const float vv) const {
- return cross(eval_du(uu,vv),eval_dv(uu,vv));
- }
-
- __forceinline void eval(const float u, const float v,
- Vertex* P, Vertex* dPdu, Vertex* dPdv, Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv,
- const float dscale = 1.0f) const
- {
- if (P) {
- *P = eval(u,v);
- }
- if (dPdu) {
- assert(dPdu); *dPdu = eval_du(u,v)*dscale;
- assert(dPdv); *dPdv = eval_dv(u,v)*dscale;
- }
- if (ddPdudu) {
- assert(ddPdudu); *ddPdudu = eval_dudu(u,v)*sqr(dscale);
- assert(ddPdvdv); *ddPdvdv = eval_dvdv(u,v)*sqr(dscale);
- assert(ddPdudv); *ddPdudv = eval_dudv(u,v)*sqr(dscale);
- }
- }
-
- template<class vfloat>
- __forceinline Vec3<vfloat> eval(const vfloat& uu, const vfloat& vv) const
- {
- const vfloat x = lerp(lerp(v[0].x,v[1].x,uu),lerp(v[3].x,v[2].x,uu),vv);
- const vfloat y = lerp(lerp(v[0].y,v[1].y,uu),lerp(v[3].y,v[2].y,uu),vv);
- const vfloat z = lerp(lerp(v[0].z,v[1].z,uu),lerp(v[3].z,v[2].z,uu),vv);
- return Vec3<vfloat>(x,y,z);
- }
-
- template<class vfloat>
- __forceinline Vec3<vfloat> eval_du(const vfloat& uu, const vfloat& vv) const
- {
- const vfloat x = lerp(v[1].x-v[0].x,v[2].x-v[3].x,vv);
- const vfloat y = lerp(v[1].y-v[0].y,v[2].y-v[3].y,vv);
- const vfloat z = lerp(v[1].z-v[0].z,v[2].z-v[3].z,vv);
- return Vec3<vfloat>(x,y,z);
- }
-
- template<class vfloat>
- __forceinline Vec3<vfloat> eval_dv(const vfloat& uu, const vfloat& vv) const
- {
- const vfloat x = lerp(v[3].x-v[0].x,v[2].x-v[1].x,uu);
- const vfloat y = lerp(v[3].y-v[0].y,v[2].y-v[1].y,uu);
- const vfloat z = lerp(v[3].z-v[0].z,v[2].z-v[1].z,uu);
- return Vec3<vfloat>(x,y,z);
- }
-
- template<typename vfloat>
- __forceinline Vec3<vfloat> normal(const vfloat& uu, const vfloat& vv) const {
- return cross(eval_du(uu,vv),eval_dv(uu,vv));
- }
-
- template<class vfloat>
- __forceinline vfloat eval(const size_t i, const vfloat& uu, const vfloat& vv) const {
- return lerp(lerp(v[0][i],v[1][i],uu),lerp(v[3][i],v[2][i],uu),vv);
- }
-
- template<class vfloat>
- __forceinline vfloat eval_du(const size_t i, const vfloat& uu, const vfloat& vv) const {
- return lerp(v[1][i]-v[0][i],v[2][i]-v[3][i],vv);
- }
-
- template<class vfloat>
- __forceinline vfloat eval_dv(const size_t i, const vfloat& uu, const vfloat& vv) const {
- return lerp(v[3][i]-v[0][i],v[2][i]-v[1][i],uu);
- }
-
- template<class vfloat>
- __forceinline vfloat eval_dudu(const size_t i, const vfloat& uu, const vfloat& vv) const {
- return vfloat(zero);
- }
-
- template<class vfloat>
- __forceinline vfloat eval_dvdv(const size_t i, const vfloat& uu, const vfloat& vv) const {
- return vfloat(zero);
- }
-
- template<class vfloat>
- __forceinline vfloat eval_dudv(const size_t i, const vfloat& uu, const vfloat& vv) const {
- return (v[2][i]-v[3][i]) - (v[1][i]-v[0][i]);
- }
-
- template<typename vbool, typename vfloat>
- __forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv,
- float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
- const float dscale, const size_t dstride, const size_t N) const
- {
- if (P) {
- for (size_t i=0; i<N; i++) vfloat::store(valid,P+i*dstride,eval(i,uu,vv));
- }
- if (dPdu) {
- for (size_t i=0; i<N; i++) {
- assert(dPdu); vfloat::store(valid,dPdu+i*dstride,eval_du(i,uu,vv)*dscale);
- assert(dPdv); vfloat::store(valid,dPdv+i*dstride,eval_dv(i,uu,vv)*dscale);
- }
- }
- if (ddPdudu) {
- for (size_t i=0; i<N; i++) {
- assert(ddPdudu); vfloat::store(valid,ddPdudu+i*dstride,eval_dudu(i,uu,vv)*sqr(dscale));
- assert(ddPdvdv); vfloat::store(valid,ddPdvdv+i*dstride,eval_dvdv(i,uu,vv)*sqr(dscale));
- assert(ddPdudv); vfloat::store(valid,ddPdudv+i*dstride,eval_dudv(i,uu,vv)*sqr(dscale));
- }
- }
- }
- };
-
- typedef BilinearPatchT<Vec3fa,Vec3fa_t> BilinearPatch3fa;
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/bspline_curve.h b/thirdparty/embree-aarch64/kernels/subdiv/bspline_curve.h
deleted file mode 100644
index a325667328..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/bspline_curve.h
+++ /dev/null
@@ -1,319 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/default.h"
-#include "bezier_curve.h"
-
-namespace embree
-{
- class BSplineBasis
- {
- public:
-
- template<typename T>
- static __forceinline Vec4<T> eval(const T& u)
- {
- const T t = u;
- const T s = T(1.0f) - u;
- const T n0 = s*s*s;
- const T n1 = (4.0f*(s*s*s)+(t*t*t)) + (12.0f*((s*t)*s) + 6.0f*((t*s)*t));
- const T n2 = (4.0f*(t*t*t)+(s*s*s)) + (12.0f*((t*s)*t) + 6.0f*((s*t)*s));
- const T n3 = t*t*t;
- return T(1.0f/6.0f)*Vec4<T>(n0,n1,n2,n3);
- }
-
- template<typename T>
- static __forceinline Vec4<T> derivative(const T& u)
- {
- const T t = u;
- const T s = 1.0f - u;
- const T n0 = -s*s;
- const T n1 = -t*t - 4.0f*(t*s);
- const T n2 = s*s + 4.0f*(s*t);
- const T n3 = t*t;
- return T(0.5f)*Vec4<T>(n0,n1,n2,n3);
- }
-
- template<typename T>
- static __forceinline Vec4<T> derivative2(const T& u)
- {
- const T t = u;
- const T s = 1.0f - u;
- const T n0 = s;
- const T n1 = t - 2.0f*s;
- const T n2 = s - 2.0f*t;
- const T n3 = t;
- return Vec4<T>(n0,n1,n2,n3);
- }
- };
-
- struct PrecomputedBSplineBasis
- {
- enum { N = 16 };
- public:
- PrecomputedBSplineBasis() {}
- PrecomputedBSplineBasis(int shift);
-
- /* basis for bspline evaluation */
- public:
- float c0[N+1][N+1];
- float c1[N+1][N+1];
- float c2[N+1][N+1];
- float c3[N+1][N+1];
-
- /* basis for bspline derivative evaluation */
- public:
- float d0[N+1][N+1];
- float d1[N+1][N+1];
- float d2[N+1][N+1];
- float d3[N+1][N+1];
- };
- extern PrecomputedBSplineBasis bspline_basis0;
- extern PrecomputedBSplineBasis bspline_basis1;
-
- template<typename Vertex>
- struct BSplineCurveT
- {
- Vertex v0,v1,v2,v3;
-
- __forceinline BSplineCurveT() {}
-
- __forceinline BSplineCurveT(const Vertex& v0, const Vertex& v1, const Vertex& v2, const Vertex& v3)
- : v0(v0), v1(v1), v2(v2), v3(v3) {}
-
- __forceinline Vertex begin() const {
- return madd(1.0f/6.0f,v0,madd(2.0f/3.0f,v1,1.0f/6.0f*v2));
- }
-
- __forceinline Vertex end() const {
- return madd(1.0f/6.0f,v1,madd(2.0f/3.0f,v2,1.0f/6.0f*v3));
- }
-
- __forceinline Vertex center() const {
- return 0.25f*(v0+v1+v2+v3);
- }
-
- __forceinline BBox<Vertex> bounds() const {
- return merge(BBox<Vertex>(v0),BBox<Vertex>(v1),BBox<Vertex>(v2),BBox<Vertex>(v3));
- }
-
- __forceinline friend BSplineCurveT operator -( const BSplineCurveT& a, const Vertex& b ) {
- return BSplineCurveT(a.v0-b,a.v1-b,a.v2-b,a.v3-b);
- }
-
- __forceinline BSplineCurveT<Vec3ff> xfm_pr(const LinearSpace3fa& space, const Vec3fa& p) const
- {
- const Vec3ff q0(xfmVector(space,(Vec3fa)v0-p), v0.w);
- const Vec3ff q1(xfmVector(space,(Vec3fa)v1-p), v1.w);
- const Vec3ff q2(xfmVector(space,(Vec3fa)v2-p), v2.w);
- const Vec3ff q3(xfmVector(space,(Vec3fa)v3-p), v3.w);
- return BSplineCurveT<Vec3ff>(q0,q1,q2,q3);
- }
-
- __forceinline Vertex eval(const float t) const
- {
- const Vec4<float> b = BSplineBasis::eval(t);
- return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
- }
-
- __forceinline Vertex eval_du(const float t) const
- {
- const Vec4<float> b = BSplineBasis::derivative(t);
- return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
- }
-
- __forceinline Vertex eval_dudu(const float t) const
- {
- const Vec4<float> b = BSplineBasis::derivative2(t);
- return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
- }
-
- __forceinline void eval(const float t, Vertex& p, Vertex& dp, Vertex& ddp) const
- {
- p = eval(t);
- dp = eval_du(t);
- ddp = eval_dudu(t);
- }
-
- template<int M>
- __forceinline Vec4vf<M> veval(const vfloat<M>& t) const
- {
- const Vec4vf<M> b = BSplineBasis::eval(t);
- return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
- }
-
- template<int M>
- __forceinline Vec4vf<M> veval_du(const vfloat<M>& t) const
- {
- const Vec4vf<M> b = BSplineBasis::derivative(t);
- return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
- }
-
- template<int M>
- __forceinline Vec4vf<M> veval_dudu(const vfloat<M>& t) const
- {
- const Vec4vf<M> b = BSplineBasis::derivative2(t);
- return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
- }
-
- template<int M>
- __forceinline void veval(const vfloat<M>& t, Vec4vf<M>& p, Vec4vf<M>& dp) const
- {
- p = veval(t);
- dp = veval_du(t);
- }
-
- template<int M>
- __forceinline Vec4vf<M> eval0(const int ofs, const int size) const
- {
- assert(size <= PrecomputedBSplineBasis::N);
- assert(ofs <= size);
- return madd(vfloat<M>::loadu(&bspline_basis0.c0[size][ofs]), Vec4vf<M>(v0),
- madd(vfloat<M>::loadu(&bspline_basis0.c1[size][ofs]), Vec4vf<M>(v1),
- madd(vfloat<M>::loadu(&bspline_basis0.c2[size][ofs]), Vec4vf<M>(v2),
- vfloat<M>::loadu(&bspline_basis0.c3[size][ofs]) * Vec4vf<M>(v3))));
- }
-
- template<int M>
- __forceinline Vec4vf<M> eval1(const int ofs, const int size) const
- {
- assert(size <= PrecomputedBSplineBasis::N);
- assert(ofs <= size);
- return madd(vfloat<M>::loadu(&bspline_basis1.c0[size][ofs]), Vec4vf<M>(v0),
- madd(vfloat<M>::loadu(&bspline_basis1.c1[size][ofs]), Vec4vf<M>(v1),
- madd(vfloat<M>::loadu(&bspline_basis1.c2[size][ofs]), Vec4vf<M>(v2),
- vfloat<M>::loadu(&bspline_basis1.c3[size][ofs]) * Vec4vf<M>(v3))));
- }
-
- template<int M>
- __forceinline Vec4vf<M> derivative0(const int ofs, const int size) const
- {
- assert(size <= PrecomputedBSplineBasis::N);
- assert(ofs <= size);
- return madd(vfloat<M>::loadu(&bspline_basis0.d0[size][ofs]), Vec4vf<M>(v0),
- madd(vfloat<M>::loadu(&bspline_basis0.d1[size][ofs]), Vec4vf<M>(v1),
- madd(vfloat<M>::loadu(&bspline_basis0.d2[size][ofs]), Vec4vf<M>(v2),
- vfloat<M>::loadu(&bspline_basis0.d3[size][ofs]) * Vec4vf<M>(v3))));
- }
-
- template<int M>
- __forceinline Vec4vf<M> derivative1(const int ofs, const int size) const
- {
- assert(size <= PrecomputedBSplineBasis::N);
- assert(ofs <= size);
- return madd(vfloat<M>::loadu(&bspline_basis1.d0[size][ofs]), Vec4vf<M>(v0),
- madd(vfloat<M>::loadu(&bspline_basis1.d1[size][ofs]), Vec4vf<M>(v1),
- madd(vfloat<M>::loadu(&bspline_basis1.d2[size][ofs]), Vec4vf<M>(v2),
- vfloat<M>::loadu(&bspline_basis1.d3[size][ofs]) * Vec4vf<M>(v3))));
- }
-
- /* calculates bounds of bspline curve geometry */
- __forceinline BBox3fa accurateRoundBounds() const
- {
- const int N = 7;
- const float scale = 1.0f/(3.0f*(N-1));
- Vec4vfx pl(pos_inf), pu(neg_inf);
- for (int i=0; i<=N; i+=VSIZEX)
- {
- vintx vi = vintx(i)+vintx(step);
- vboolx valid = vi <= vintx(N);
- const Vec4vfx p = eval0<VSIZEX>(i,N);
- const Vec4vfx dp = derivative0<VSIZEX>(i,N);
- const Vec4vfx pm = p-Vec4vfx(scale)*select(vi!=vintx(0),dp,Vec4vfx(zero));
- const Vec4vfx pp = p+Vec4vfx(scale)*select(vi!=vintx(N),dp,Vec4vfx(zero));
- pl = select(valid,min(pl,p,pm,pp),pl); // FIXME: use masked min
- pu = select(valid,max(pu,p,pm,pp),pu); // FIXME: use masked min
- }
- const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
- const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
- const float r_min = reduce_min(pl.w);
- const float r_max = reduce_max(pu.w);
- const Vec3fa upper_r = Vec3fa(max(abs(r_min),abs(r_max)));
- return enlarge(BBox3fa(lower,upper),upper_r);
- }
-
- /* calculates bounds when tessellated into N line segments */
- __forceinline BBox3fa accurateFlatBounds(int N) const
- {
- if (likely(N == 4))
- {
- const Vec4vf4 pi = eval0<4>(0,4);
- const Vec3fa lower(reduce_min(pi.x),reduce_min(pi.y),reduce_min(pi.z));
- const Vec3fa upper(reduce_max(pi.x),reduce_max(pi.y),reduce_max(pi.z));
- const Vec3fa upper_r = Vec3fa(reduce_max(abs(pi.w)));
- const Vec3ff pe = end();
- return enlarge(BBox3fa(min(lower,pe),max(upper,pe)),max(upper_r,Vec3fa(abs(pe.w))));
- }
- else
- {
- Vec3vfx pl(pos_inf), pu(neg_inf); vfloatx ru(0.0f);
- for (int i=0; i<=N; i+=VSIZEX)
- {
- vboolx valid = vintx(i)+vintx(step) <= vintx(N);
- const Vec4vfx pi = eval0<VSIZEX>(i,N);
-
- pl.x = select(valid,min(pl.x,pi.x),pl.x); // FIXME: use masked min
- pl.y = select(valid,min(pl.y,pi.y),pl.y);
- pl.z = select(valid,min(pl.z,pi.z),pl.z);
-
- pu.x = select(valid,max(pu.x,pi.x),pu.x); // FIXME: use masked min
- pu.y = select(valid,max(pu.y,pi.y),pu.y);
- pu.z = select(valid,max(pu.z,pi.z),pu.z);
-
- ru = select(valid,max(ru,abs(pi.w)),ru);
- }
- const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
- const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
- const Vec3fa upper_r(reduce_max(ru));
- return enlarge(BBox3fa(lower,upper),upper_r);
- }
- }
-
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const BSplineCurveT& curve) {
- return cout << "BSplineCurve { v0 = " << curve.v0 << ", v1 = " << curve.v1 << ", v2 = " << curve.v2 << ", v3 = " << curve.v3 << " }";
- }
- };
-
- template<typename Vertex>
- __forceinline void convert(const BezierCurveT<Vertex>& icurve, BezierCurveT<Vertex>& ocurve) {
- ocurve = icurve;
- }
-
- template<typename Vertex>
- __forceinline void convert(const BSplineCurveT<Vertex>& icurve, BSplineCurveT<Vertex>& ocurve) {
- ocurve = icurve;
- }
-
- template<typename Vertex>
- __forceinline void convert(const BezierCurveT<Vertex>& icurve, BSplineCurveT<Vertex>& ocurve)
- {
- const Vertex v0 = madd(6.0f,icurve.v0,madd(-7.0f,icurve.v1,2.0f*icurve.v2));
- const Vertex v1 = msub(2.0f,icurve.v1,icurve.v2);
- const Vertex v2 = msub(2.0f,icurve.v2,icurve.v1);
- const Vertex v3 = madd(2.0f,icurve.v1,madd(-7.0f,icurve.v2,6.0f*icurve.v3));
- ocurve = BSplineCurveT<Vertex>(v0,v1,v2,v3);
- }
-
- template<typename Vertex>
- __forceinline void convert(const BSplineCurveT<Vertex>& icurve, BezierCurveT<Vertex>& ocurve)
- {
- const Vertex v0 = madd(1.0f/6.0f,icurve.v0,madd(2.0f/3.0f,icurve.v1,1.0f/6.0f*icurve.v2));
- const Vertex v1 = madd(2.0f/3.0f,icurve.v1,1.0f/3.0f*icurve.v2);
- const Vertex v2 = madd(1.0f/3.0f,icurve.v1,2.0f/3.0f*icurve.v2);
- const Vertex v3 = madd(1.0f/6.0f,icurve.v1,madd(2.0f/3.0f,icurve.v2,1.0f/6.0f*icurve.v3));
- ocurve = BezierCurveT<Vertex>(v0,v1,v2,v3);
- }
-
- __forceinline BSplineCurveT<Vec3ff> enlargeRadiusToMinWidth(const IntersectContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const BSplineCurveT<Vec3ff>& curve)
- {
- return BSplineCurveT<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,curve.v0),
- enlargeRadiusToMinWidth(context,geom,ray_org,curve.v1),
- enlargeRadiusToMinWidth(context,geom,ray_org,curve.v2),
- enlargeRadiusToMinWidth(context,geom,ray_org,curve.v3));
- }
-
- typedef BSplineCurveT<Vec3fa> BSplineCurve3fa;
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/bspline_patch.h b/thirdparty/embree-aarch64/kernels/subdiv/bspline_patch.h
deleted file mode 100644
index 9769bc17bd..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/bspline_patch.h
+++ /dev/null
@@ -1,449 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "catmullclark_patch.h"
-#include "bspline_curve.h"
-
-namespace embree
-{
- template<typename Vertex, typename Vertex_t = Vertex>
- class __aligned(64) BSplinePatchT
- {
- typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClarkRing;
- typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
-
- public:
-
- __forceinline BSplinePatchT () {}
-
- __forceinline BSplinePatchT (const CatmullClarkPatch& patch) {
- init(patch);
- }
-
- __forceinline BSplinePatchT(const CatmullClarkPatch& patch,
- const BezierCurveT<Vertex>* border0,
- const BezierCurveT<Vertex>* border1,
- const BezierCurveT<Vertex>* border2,
- const BezierCurveT<Vertex>* border3)
- {
- init(patch);
- }
-
- __forceinline BSplinePatchT (const HalfEdge* edge, const char* vertices, size_t stride) {
- init(edge,vertices,stride);
- }
-
- __forceinline Vertex hard_corner(const Vertex& v01, const Vertex& v02,
- const Vertex& v10, const Vertex& v11, const Vertex& v12,
- const Vertex& v20, const Vertex& v21, const Vertex& v22)
- {
- return 4.0f*v11 - 2.0f*(v12+v21) + v22;
- }
-
- __forceinline Vertex soft_convex_corner( const Vertex& v01, const Vertex& v02,
- const Vertex& v10, const Vertex& v11, const Vertex& v12,
- const Vertex& v20, const Vertex& v21, const Vertex& v22)
- {
- return -8.0f*v11 + 4.0f*(v12+v21) + v22;
- }
-
- __forceinline Vertex convex_corner(const float vertex_crease_weight,
- const Vertex& v01, const Vertex& v02,
- const Vertex& v10, const Vertex& v11, const Vertex& v12,
- const Vertex& v20, const Vertex& v21, const Vertex& v22)
- {
- if (std::isinf(vertex_crease_weight)) return hard_corner(v01,v02,v10,v11,v12,v20,v21,v22);
- else return soft_convex_corner(v01,v02,v10,v11,v12,v20,v21,v22);
- }
-
- __forceinline Vertex load(const HalfEdge* edge, const char* vertices, size_t stride) {
- return Vertex_t::loadu(vertices+edge->getStartVertexIndex()*stride);
- }
-
- __forceinline void init_border(const CatmullClarkRing& edge0,
- Vertex& v01, Vertex& v02,
- const Vertex& v11, const Vertex& v12,
- const Vertex& v21, const Vertex& v22)
- {
- if (likely(edge0.has_opposite_back(0)))
- {
- v01 = edge0.back(2);
- v02 = edge0.back(1);
- } else {
- v01 = 2.0f*v11-v21;
- v02 = 2.0f*v12-v22;
- }
- }
-
- __forceinline void init_corner(const CatmullClarkRing& edge0,
- Vertex& v00, const Vertex& v01, const Vertex& v02,
- const Vertex& v10, const Vertex& v11, const Vertex& v12,
- const Vertex& v20, const Vertex& v21, const Vertex& v22)
- {
- const bool MAYBE_UNUSED has_back1 = edge0.has_opposite_back(1);
- const bool has_back0 = edge0.has_opposite_back(0);
- const bool has_front1 = edge0.has_opposite_front(1);
- const bool MAYBE_UNUSED has_front2 = edge0.has_opposite_front(2);
-
- if (likely(has_back0)) {
- if (likely(has_front1)) { assert(has_back1 && has_front2); v00 = edge0.back(3); }
- else { assert(!has_back1); v00 = 2.0f*v01-v02; }
- }
- else {
- if (likely(has_front1)) { assert(!has_front2); v00 = 2.0f*v10-v20; }
- else v00 = convex_corner(edge0.vertex_crease_weight,v01,v02,v10,v11,v12,v20,v21,v22);
- }
- }
-
- void init(const CatmullClarkPatch& patch)
- {
- /* fill inner vertices */
- const Vertex v11 = v[1][1] = patch.ring[0].vtx;
- const Vertex v12 = v[1][2] = patch.ring[1].vtx;
- const Vertex v22 = v[2][2] = patch.ring[2].vtx;
- const Vertex v21 = v[2][1] = patch.ring[3].vtx;
-
- /* fill border vertices */
- init_border(patch.ring[0],v[0][1],v[0][2],v11,v12,v21,v22);
- init_border(patch.ring[1],v[1][3],v[2][3],v12,v22,v11,v21);
- init_border(patch.ring[2],v[3][2],v[3][1],v22,v21,v12,v11);
- init_border(patch.ring[3],v[2][0],v[1][0],v21,v11,v22,v12);
-
- /* fill corner vertices */
- init_corner(patch.ring[0],v[0][0],v[0][1],v[0][2],v[1][0],v11,v12,v[2][0],v21,v22);
- init_corner(patch.ring[1],v[0][3],v[1][3],v[2][3],v[0][2],v12,v22,v[0][1],v11,v21);
- init_corner(patch.ring[2],v[3][3],v[3][2],v[3][1],v[2][3],v22,v21,v[1][3],v12,v11);
- init_corner(patch.ring[3],v[3][0],v[2][0],v[1][0],v[3][1],v21,v11,v[3][2],v22,v12);
- }
-
- void init_border(const HalfEdge* edge0, const char* vertices, size_t stride,
- Vertex& v01, Vertex& v02,
- const Vertex& v11, const Vertex& v12,
- const Vertex& v21, const Vertex& v22)
- {
- if (likely(edge0->hasOpposite()))
- {
- const HalfEdge* e = edge0->opposite()->next()->next();
- v01 = load(e,vertices,stride);
- v02 = load(e->next(),vertices,stride);
- } else {
- v01 = 2.0f*v11-v21;
- v02 = 2.0f*v12-v22;
- }
- }
-
- void init_corner(const HalfEdge* edge0, const char* vertices, size_t stride,
- Vertex& v00, const Vertex& v01, const Vertex& v02,
- const Vertex& v10, const Vertex& v11, const Vertex& v12,
- const Vertex& v20, const Vertex& v21, const Vertex& v22)
- {
- const bool has_back0 = edge0->hasOpposite();
- const bool has_front1 = edge0->prev()->hasOpposite();
-
- if (likely(has_back0))
- {
- const HalfEdge* e = edge0->opposite()->next();
- if (likely(has_front1))
- {
- assert(e->hasOpposite());
- assert(edge0->prev()->opposite()->prev()->hasOpposite());
- v00 = load(e->opposite()->prev(),vertices,stride);
- }
- else {
- assert(!e->hasOpposite());
- v00 = 2.0f*v01-v02;
- }
- }
- else
- {
- if (likely(has_front1)) {
- assert(!edge0->prev()->opposite()->prev()->hasOpposite());
- v00 = 2.0f*v10-v20;
- }
- else {
- assert(edge0->vertex_crease_weight == 0.0f || std::isinf(edge0->vertex_crease_weight));
- v00 = convex_corner(edge0->vertex_crease_weight,v01,v02,v10,v11,v12,v20,v21,v22);
- }
- }
- }
-
- void init(const HalfEdge* edge0, const char* vertices, size_t stride)
- {
- assert( edge0->isRegularFace() );
-
- /* fill inner vertices */
- const Vertex v11 = v[1][1] = load(edge0,vertices,stride); const HalfEdge* edge1 = edge0->next();
- const Vertex v12 = v[1][2] = load(edge1,vertices,stride); const HalfEdge* edge2 = edge1->next();
- const Vertex v22 = v[2][2] = load(edge2,vertices,stride); const HalfEdge* edge3 = edge2->next();
- const Vertex v21 = v[2][1] = load(edge3,vertices,stride); assert(edge0 == edge3->next());
-
- /* fill border vertices */
- init_border(edge0,vertices,stride,v[0][1],v[0][2],v11,v12,v21,v22);
- init_border(edge1,vertices,stride,v[1][3],v[2][3],v12,v22,v11,v21);
- init_border(edge2,vertices,stride,v[3][2],v[3][1],v22,v21,v12,v11);
- init_border(edge3,vertices,stride,v[2][0],v[1][0],v21,v11,v22,v12);
-
- /* fill corner vertices */
- init_corner(edge0,vertices,stride,v[0][0],v[0][1],v[0][2],v[1][0],v11,v12,v[2][0],v21,v22);
- init_corner(edge1,vertices,stride,v[0][3],v[1][3],v[2][3],v[0][2],v12,v22,v[0][1],v11,v21);
- init_corner(edge2,vertices,stride,v[3][3],v[3][2],v[3][1],v[2][3],v22,v21,v[1][3],v12,v11);
- init_corner(edge3,vertices,stride,v[3][0],v[2][0],v[1][0],v[3][1],v21,v11,v[3][2],v22,v12);
- }
-
- __forceinline BBox<Vertex> bounds() const
- {
- const Vertex* const cv = &v[0][0];
- BBox<Vertex> bounds (cv[0]);
- for (size_t i=1; i<16 ; i++)
- bounds.extend( cv[i] );
- return bounds;
- }
-
- __forceinline Vertex eval(const float uu, const float vv) const
- {
- const Vec4f v_n = BSplineBasis::eval(vv);
- const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
- const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
- const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
- const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
-
- const Vec4f u_n = BSplineBasis::eval(uu);
- return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
- }
-
- __forceinline Vertex eval_du(const float uu, const float vv) const
- {
- const Vec4f v_n = BSplineBasis::eval(vv);
- const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
- const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
- const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
- const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
-
- const Vec4f u_n = BSplineBasis::derivative(uu);
- return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
- }
-
- __forceinline Vertex eval_dv(const float uu, const float vv) const
- {
- const Vec4f v_n = BSplineBasis::derivative(vv);
- const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
- const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
- const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
- const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
-
- const Vec4f u_n = BSplineBasis::eval(uu);
- return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
- }
-
- __forceinline Vertex eval_dudu(const float uu, const float vv) const
- {
- const Vec4f v_n = BSplineBasis::eval(vv);
- const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
- const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
- const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
- const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
-
- const Vec4f u_n = BSplineBasis::derivative2(uu);
- return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
- }
-
- __forceinline Vertex eval_dvdv(const float uu, const float vv) const
- {
- const Vec4f v_n = BSplineBasis::derivative2(vv);
- const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
- const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
- const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
- const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
-
- const Vec4f u_n = BSplineBasis::eval(uu);
- return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
- }
-
- __forceinline Vertex eval_dudv(const float uu, const float vv) const
- {
- const Vec4f v_n = BSplineBasis::derivative(vv);
- const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
- const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
- const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
- const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
-
- const Vec4f u_n = BSplineBasis::derivative(uu);
- return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
- }
-
- __forceinline Vertex normal(const float uu, const float vv) const
- {
- const Vertex tu = eval_du(uu,vv);
- const Vertex tv = eval_dv(uu,vv);
- return cross(tu,tv);
- }
-
- template<typename T>
- __forceinline Vec3<T> eval(const T& uu, const T& vv, const Vec4<T>& u_n, const Vec4<T>& v_n) const
- {
- const T curve0_x = madd(v_n[0],T(v[0][0].x),madd(v_n[1],T(v[1][0].x),madd(v_n[2],T(v[2][0].x),v_n[3] * T(v[3][0].x))));
- const T curve1_x = madd(v_n[0],T(v[0][1].x),madd(v_n[1],T(v[1][1].x),madd(v_n[2],T(v[2][1].x),v_n[3] * T(v[3][1].x))));
- const T curve2_x = madd(v_n[0],T(v[0][2].x),madd(v_n[1],T(v[1][2].x),madd(v_n[2],T(v[2][2].x),v_n[3] * T(v[3][2].x))));
- const T curve3_x = madd(v_n[0],T(v[0][3].x),madd(v_n[1],T(v[1][3].x),madd(v_n[2],T(v[2][3].x),v_n[3] * T(v[3][3].x))));
- const T x = madd(u_n[0],curve0_x,madd(u_n[1],curve1_x,madd(u_n[2],curve2_x,u_n[3] * curve3_x)));
-
- const T curve0_y = madd(v_n[0],T(v[0][0].y),madd(v_n[1],T(v[1][0].y),madd(v_n[2],T(v[2][0].y),v_n[3] * T(v[3][0].y))));
- const T curve1_y = madd(v_n[0],T(v[0][1].y),madd(v_n[1],T(v[1][1].y),madd(v_n[2],T(v[2][1].y),v_n[3] * T(v[3][1].y))));
- const T curve2_y = madd(v_n[0],T(v[0][2].y),madd(v_n[1],T(v[1][2].y),madd(v_n[2],T(v[2][2].y),v_n[3] * T(v[3][2].y))));
- const T curve3_y = madd(v_n[0],T(v[0][3].y),madd(v_n[1],T(v[1][3].y),madd(v_n[2],T(v[2][3].y),v_n[3] * T(v[3][3].y))));
- const T y = madd(u_n[0],curve0_y,madd(u_n[1],curve1_y,madd(u_n[2],curve2_y,u_n[3] * curve3_y)));
-
- const T curve0_z = madd(v_n[0],T(v[0][0].z),madd(v_n[1],T(v[1][0].z),madd(v_n[2],T(v[2][0].z),v_n[3] * T(v[3][0].z))));
- const T curve1_z = madd(v_n[0],T(v[0][1].z),madd(v_n[1],T(v[1][1].z),madd(v_n[2],T(v[2][1].z),v_n[3] * T(v[3][1].z))));
- const T curve2_z = madd(v_n[0],T(v[0][2].z),madd(v_n[1],T(v[1][2].z),madd(v_n[2],T(v[2][2].z),v_n[3] * T(v[3][2].z))));
- const T curve3_z = madd(v_n[0],T(v[0][3].z),madd(v_n[1],T(v[1][3].z),madd(v_n[2],T(v[2][3].z),v_n[3] * T(v[3][3].z))));
- const T z = madd(u_n[0],curve0_z,madd(u_n[1],curve1_z,madd(u_n[2],curve2_z,u_n[3] * curve3_z)));
-
- return Vec3<T>(x,y,z);
- }
-
- template<typename T>
- __forceinline Vec3<T> eval(const T& uu, const T& vv) const
- {
- const Vec4<T> u_n = BSplineBasis::eval(uu);
- const Vec4<T> v_n = BSplineBasis::eval(vv);
- return eval(uu,vv,u_n,v_n);
- }
-
- template<typename T>
- __forceinline Vec3<T> eval_du(const T& uu, const T& vv) const
- {
- const Vec4<T> u_n = BSplineBasis::derivative(uu);
- const Vec4<T> v_n = BSplineBasis::eval(vv);
- return eval(uu,vv,u_n,v_n);
- }
-
- template<typename T>
- __forceinline Vec3<T> eval_dv(const T& uu, const T& vv) const
- {
- const Vec4<T> u_n = BSplineBasis::eval(uu);
- const Vec4<T> v_n = BSplineBasis::derivative(vv);
- return eval(uu,vv,u_n,v_n);
- }
-
- template<typename T>
- __forceinline Vec3<T> eval_dudu(const T& uu, const T& vv) const
- {
- const Vec4<T> u_n = BSplineBasis::derivative2(uu);
- const Vec4<T> v_n = BSplineBasis::eval(vv);
- return eval(uu,vv,u_n,v_n);
- }
-
- template<typename T>
- __forceinline Vec3<T> eval_dvdv(const T& uu, const T& vv) const
- {
- const Vec4<T> u_n = BSplineBasis::eval(uu);
- const Vec4<T> v_n = BSplineBasis::derivative2(vv);
- return eval(uu,vv,u_n,v_n);
- }
-
- template<typename T>
- __forceinline Vec3<T> eval_dudv(const T& uu, const T& vv) const
- {
- const Vec4<T> u_n = BSplineBasis::derivative(uu);
- const Vec4<T> v_n = BSplineBasis::derivative(vv);
- return eval(uu,vv,u_n,v_n);
- }
-
- template<typename T>
- __forceinline Vec3<T> normal(const T& uu, const T& vv) const {
- return cross(eval_du(uu,vv),eval_dv(uu,vv));
- }
-
- void eval(const float u, const float v,
- Vertex* P, Vertex* dPdu, Vertex* dPdv, Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv,
- const float dscale = 1.0f) const
- {
- if (P) {
- *P = eval(u,v);
- }
- if (dPdu) {
- assert(dPdu); *dPdu = eval_du(u,v)*dscale;
- assert(dPdv); *dPdv = eval_dv(u,v)*dscale;
- }
- if (ddPdudu) {
- assert(ddPdudu); *ddPdudu = eval_dudu(u,v)*sqr(dscale);
- assert(ddPdvdv); *ddPdvdv = eval_dvdv(u,v)*sqr(dscale);
- assert(ddPdudv); *ddPdudv = eval_dudv(u,v)*sqr(dscale);
- }
- }
-
- template<class vfloat>
- __forceinline vfloat eval(const size_t i, const vfloat& uu, const vfloat& vv, const Vec4<vfloat>& u_n, const Vec4<vfloat>& v_n) const
- {
- const vfloat curve0_x = madd(v_n[0],vfloat(v[0][0][i]),madd(v_n[1],vfloat(v[1][0][i]),madd(v_n[2],vfloat(v[2][0][i]),v_n[3] * vfloat(v[3][0][i]))));
- const vfloat curve1_x = madd(v_n[0],vfloat(v[0][1][i]),madd(v_n[1],vfloat(v[1][1][i]),madd(v_n[2],vfloat(v[2][1][i]),v_n[3] * vfloat(v[3][1][i]))));
- const vfloat curve2_x = madd(v_n[0],vfloat(v[0][2][i]),madd(v_n[1],vfloat(v[1][2][i]),madd(v_n[2],vfloat(v[2][2][i]),v_n[3] * vfloat(v[3][2][i]))));
- const vfloat curve3_x = madd(v_n[0],vfloat(v[0][3][i]),madd(v_n[1],vfloat(v[1][3][i]),madd(v_n[2],vfloat(v[2][3][i]),v_n[3] * vfloat(v[3][3][i]))));
- return madd(u_n[0],curve0_x,madd(u_n[1],curve1_x,madd(u_n[2],curve2_x,u_n[3] * curve3_x)));
- }
-
- template<typename vbool, typename vfloat>
- void eval(const vbool& valid, const vfloat& uu, const vfloat& vv,
- float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
- const float dscale, const size_t dstride, const size_t N) const
- {
- if (P) {
- const Vec4<vfloat> u_n = BSplineBasis::eval(uu);
- const Vec4<vfloat> v_n = BSplineBasis::eval(vv);
- for (size_t i=0; i<N; i++) vfloat::store(valid,P+i*dstride,eval(i,uu,vv,u_n,v_n));
- }
- if (dPdu)
- {
- {
- assert(dPdu);
- const Vec4<vfloat> u_n = BSplineBasis::derivative(uu);
- const Vec4<vfloat> v_n = BSplineBasis::eval(vv);
- for (size_t i=0; i<N; i++) vfloat::store(valid,dPdu+i*dstride,eval(i,uu,vv,u_n,v_n)*dscale);
- }
- {
- assert(dPdv);
- const Vec4<vfloat> u_n = BSplineBasis::eval(uu);
- const Vec4<vfloat> v_n = BSplineBasis::derivative(vv);
- for (size_t i=0; i<N; i++) vfloat::store(valid,dPdv+i*dstride,eval(i,uu,vv,u_n,v_n)*dscale);
- }
- }
- if (ddPdudu)
- {
- {
- assert(ddPdudu);
- const Vec4<vfloat> u_n = BSplineBasis::derivative2(uu);
- const Vec4<vfloat> v_n = BSplineBasis::eval(vv);
- for (size_t i=0; i<N; i++) vfloat::store(valid,ddPdudu+i*dstride,eval(i,uu,vv,u_n,v_n)*sqr(dscale));
- }
- {
- assert(ddPdvdv);
- const Vec4<vfloat> u_n = BSplineBasis::eval(uu);
- const Vec4<vfloat> v_n = BSplineBasis::derivative2(vv);
- for (size_t i=0; i<N; i++) vfloat::store(valid,ddPdvdv+i*dstride,eval(i,uu,vv,u_n,v_n)*sqr(dscale));
- }
- {
- assert(ddPdudv);
- const Vec4<vfloat> u_n = BSplineBasis::derivative(uu);
- const Vec4<vfloat> v_n = BSplineBasis::derivative(vv);
- for (size_t i=0; i<N; i++) vfloat::store(valid,ddPdudv+i*dstride,eval(i,uu,vv,u_n,v_n)*sqr(dscale));
- }
- }
- }
-
- friend __forceinline embree_ostream operator<<(embree_ostream o, const BSplinePatchT& p)
- {
- for (size_t y=0; y<4; y++)
- for (size_t x=0; x<4; x++)
- o << "[" << y << "][" << x << "] " << p.v[y][x] << embree_endl;
- return o;
- }
-
- public:
- Vertex v[4][4];
- };
-
- typedef BSplinePatchT<Vec3fa,Vec3fa_t> BSplinePatch3fa;
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/catmullclark_coefficients.h b/thirdparty/embree-aarch64/kernels/subdiv/catmullclark_coefficients.h
deleted file mode 100644
index 05031cf6b9..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/catmullclark_coefficients.h
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/geometry.h"
-
-namespace embree
-{
- static const size_t MAX_PATCH_VALENCE = 16; //!< maximum number of vertices of a patch
- static const size_t MAX_RING_FACE_VALENCE = 64; //!< maximum number of faces per ring
- static const size_t MAX_RING_EDGE_VALENCE = 2*64; //!< maximum number of edges per ring
-
- class CatmullClarkPrecomputedCoefficients
- {
- private:
-
- float table_cos_2PI_div_n[MAX_RING_FACE_VALENCE+1];
-
- float* table_limittangent_a[MAX_RING_FACE_VALENCE+1];
- float* table_limittangent_b[MAX_RING_FACE_VALENCE+1];
- float table_limittangent_c[MAX_RING_FACE_VALENCE+1];
-
- __forceinline float set_cos_2PI_div_n(const size_t n) {
- if (unlikely(n == 0)) return 1.0f;
- return cosf(2.0f*float(pi)/(float)n);
- }
-
- __forceinline float set_limittangent_a(const size_t i, const size_t n)
- {
- if (unlikely(n == 0)) return 1.0f;
- const float c0 = 1.0f/(float)n * 1.0f / sqrtf(4.0f + cosf(float(pi)/(float)n)*cosf(float(pi)/(float)n));
- const float c1 = (1.0f/(float)n + cosf(float(pi)/(float)n) * c0);
- return cosf(2.0f*float(pi)*(float)i/(float)n) * c1;
- }
-
- __forceinline float set_limittangent_b(const size_t i, const size_t n)
- {
- if (unlikely(n == 0)) return 1.0f;
- const float c0 = 1.0f/(float)n * 1.0f / sqrtf(4.0f + cosf(float(pi)/(float)n)*cosf(float(pi)/(float)n));
- return cosf((2.0f*float(pi)*i+float(pi))/(float)n) * c0;
- }
-
- __forceinline float set_limittangent_c(const size_t n)
- {
- if (unlikely(n == 0)) return 1.0f;
- return 2.0f/16.0f * (5.0f + cosf(2.0f*float(pi)/(float)n) + cosf(float(pi)/(float)n) * sqrtf(18.0f+2.0f*cosf(2.0f*float(pi)/(float)n)));
- }
-
- public:
-
- __forceinline float cos_2PI_div_n(const size_t n)
- {
- if (likely(n <= MAX_RING_FACE_VALENCE))
- return table_cos_2PI_div_n[n];
- else
- return set_cos_2PI_div_n(n);
- }
-
- __forceinline float limittangent_a(const size_t i, const size_t n)
- {
- assert(n <= MAX_RING_FACE_VALENCE);
- assert(i < n);
- return table_limittangent_a[n][i];
- }
-
- __forceinline float limittangent_b(const size_t i, const size_t n)
- {
- assert(n <= MAX_RING_FACE_VALENCE);
- assert(i < n);
- return table_limittangent_b[n][i];
- }
-
- __forceinline float limittangent_c(const size_t n)
- {
- assert(n <= MAX_RING_FACE_VALENCE);
- return table_limittangent_c[n];
- }
-
- static CatmullClarkPrecomputedCoefficients table;
-
- CatmullClarkPrecomputedCoefficients();
- ~CatmullClarkPrecomputedCoefficients();
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/catmullclark_patch.h b/thirdparty/embree-aarch64/kernels/subdiv/catmullclark_patch.h
deleted file mode 100644
index ab1d63594a..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/catmullclark_patch.h
+++ /dev/null
@@ -1,562 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "catmullclark_ring.h"
-#include "bezier_curve.h"
-
-namespace embree
-{
- template<typename Vertex, typename Vertex_t = Vertex>
- class __aligned(64) CatmullClarkPatchT
- {
- public:
- typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClark1Ring;
- typedef typename CatmullClark1Ring::Type Type;
-
- array_t<CatmullClark1RingT<Vertex,Vertex_t>,4> ring;
-
- public:
- __forceinline CatmullClarkPatchT () {}
-
- __forceinline CatmullClarkPatchT (const HalfEdge* first_half_edge, const char* vertices, size_t stride) {
- init(first_half_edge,vertices,stride);
- }
-
- __forceinline CatmullClarkPatchT (const HalfEdge* first_half_edge, const BufferView<Vec3fa>& vertices) {
- init(first_half_edge,vertices.getPtr(),vertices.getStride());
- }
-
- __forceinline void init (const HalfEdge* first_half_edge, const char* vertices, size_t stride)
- {
- for (unsigned i=0; i<4; i++)
- ring[i].init(first_half_edge+i,vertices,stride);
-
- assert(verify());
- }
-
- __forceinline size_t bytes() const {
- return ring[0].bytes()+ring[1].bytes()+ring[2].bytes()+ring[3].bytes();
- }
-
- __forceinline void serialize(void* ptr, size_t& ofs) const
- {
- for (size_t i=0; i<4; i++)
- ring[i].serialize((char*)ptr,ofs);
- }
-
- __forceinline void deserialize(void* ptr)
- {
- size_t ofs = 0;
- for (size_t i=0; i<4; i++)
- ring[i].deserialize((char*)ptr,ofs);
- }
-
- __forceinline BBox3fa bounds() const
- {
- BBox3fa bounds (ring[0].bounds());
- for (size_t i=1; i<4; i++)
- bounds.extend(ring[i].bounds());
- return bounds;
- }
-
- __forceinline Type type() const
- {
- const int ty0 = ring[0].type() ^ CatmullClark1Ring::TYPE_CREASES;
- const int ty1 = ring[1].type() ^ CatmullClark1Ring::TYPE_CREASES;
- const int ty2 = ring[2].type() ^ CatmullClark1Ring::TYPE_CREASES;
- const int ty3 = ring[3].type() ^ CatmullClark1Ring::TYPE_CREASES;
- return (Type) ((ty0 & ty1 & ty2 & ty3) ^ CatmullClark1Ring::TYPE_CREASES);
- }
-
- __forceinline bool isFinalResolution(float res) const {
- return ring[0].isFinalResolution(res) && ring[1].isFinalResolution(res) && ring[2].isFinalResolution(res) && ring[3].isFinalResolution(res);
- }
-
- static __forceinline void init_regular(const CatmullClark1RingT<Vertex,Vertex_t>& p0,
- const CatmullClark1RingT<Vertex,Vertex_t>& p1,
- CatmullClark1RingT<Vertex,Vertex_t>& dest0,
- CatmullClark1RingT<Vertex,Vertex_t>& dest1)
- {
- assert(p1.face_valence > 2);
- dest1.vertex_level = dest0.vertex_level = p0.edge_level;
- dest1.face_valence = dest0.face_valence = 4;
- dest1.edge_valence = dest0.edge_valence = 8;
- dest1.border_index = dest0.border_index = -1;
- dest1.vtx = dest0.vtx = (Vertex_t)p0.ring[0];
- dest1.vertex_crease_weight = dest0.vertex_crease_weight = 0.0f;
-
- dest1.ring[2] = dest0.ring[0] = (Vertex_t)p0.ring[1];
- dest1.ring[1] = dest0.ring[7] = (Vertex_t)p1.ring[0];
- dest1.ring[0] = dest0.ring[6] = (Vertex_t)p1.vtx;
- dest1.ring[7] = dest0.ring[5] = (Vertex_t)p1.ring[4];
- dest1.ring[6] = dest0.ring[4] = (Vertex_t)p0.ring[p0.edge_valence-1];
- dest1.ring[5] = dest0.ring[3] = (Vertex_t)p0.ring[p0.edge_valence-2];
- dest1.ring[4] = dest0.ring[2] = (Vertex_t)p0.vtx;
- dest1.ring[3] = dest0.ring[1] = (Vertex_t)p0.ring[2];
-
- dest1.crease_weight[1] = dest0.crease_weight[0] = 0.0f;
- dest1.crease_weight[0] = dest0.crease_weight[3] = p1.crease_weight[1];
- dest1.crease_weight[3] = dest0.crease_weight[2] = 0.0f;
- dest1.crease_weight[2] = dest0.crease_weight[1] = p0.crease_weight[0];
-
- if (p0.eval_unique_identifier <= p1.eval_unique_identifier)
- {
- dest0.eval_start_index = 3;
- dest1.eval_start_index = 0;
- dest0.eval_unique_identifier = p0.eval_unique_identifier;
- dest1.eval_unique_identifier = p0.eval_unique_identifier;
- }
- else
- {
- dest0.eval_start_index = 1;
- dest1.eval_start_index = 2;
- dest0.eval_unique_identifier = p1.eval_unique_identifier;
- dest1.eval_unique_identifier = p1.eval_unique_identifier;
- }
- }
-
- static __forceinline void init_border(const CatmullClark1RingT<Vertex,Vertex_t> &p0,
- const CatmullClark1RingT<Vertex,Vertex_t> &p1,
- CatmullClark1RingT<Vertex,Vertex_t> &dest0,
- CatmullClark1RingT<Vertex,Vertex_t> &dest1)
- {
- dest1.vertex_level = dest0.vertex_level = p0.edge_level;
- dest1.face_valence = dest0.face_valence = 3;
- dest1.edge_valence = dest0.edge_valence = 6;
- dest0.border_index = 2;
- dest1.border_index = 4;
- dest1.vtx = dest0.vtx = (Vertex_t)p0.ring[0];
- dest1.vertex_crease_weight = dest0.vertex_crease_weight = 0.0f;
-
- dest1.ring[2] = dest0.ring[0] = (Vertex_t)p0.ring[1];
- dest1.ring[1] = dest0.ring[5] = (Vertex_t)p1.ring[0];
- dest1.ring[0] = dest0.ring[4] = (Vertex_t)p1.vtx;
- dest1.ring[5] = dest0.ring[3] = (Vertex_t)p0.ring[p0.border_index+1]; // dummy
- dest1.ring[4] = dest0.ring[2] = (Vertex_t)p0.vtx;
- dest1.ring[3] = dest0.ring[1] = (Vertex_t)p0.ring[2];
-
- dest1.crease_weight[1] = dest0.crease_weight[0] = 0.0f;
- dest1.crease_weight[0] = dest0.crease_weight[2] = p1.crease_weight[1];
- dest1.crease_weight[2] = dest0.crease_weight[1] = p0.crease_weight[0];
-
- if (p0.eval_unique_identifier <= p1.eval_unique_identifier)
- {
- dest0.eval_start_index = 1;
- dest1.eval_start_index = 2;
- dest0.eval_unique_identifier = p0.eval_unique_identifier;
- dest1.eval_unique_identifier = p0.eval_unique_identifier;
- }
- else
- {
- dest0.eval_start_index = 2;
- dest1.eval_start_index = 0;
- dest0.eval_unique_identifier = p1.eval_unique_identifier;
- dest1.eval_unique_identifier = p1.eval_unique_identifier;
- }
- }
-
- static __forceinline void init_regular(const Vertex_t &center, const Vertex_t center_ring[8], const unsigned int offset, CatmullClark1RingT<Vertex,Vertex_t> &dest)
- {
- dest.vertex_level = 0.0f;
- dest.face_valence = 4;
- dest.edge_valence = 8;
- dest.border_index = -1;
- dest.vtx = (Vertex_t)center;
- dest.vertex_crease_weight = 0.0f;
- for (size_t i=0; i<8; i++)
- dest.ring[i] = (Vertex_t)center_ring[(offset+i)%8];
- for (size_t i=0; i<4; i++)
- dest.crease_weight[i] = 0.0f;
-
- dest.eval_start_index = (8-offset)>>1;
- if (dest.eval_start_index >= dest.face_valence) dest.eval_start_index -= dest.face_valence;
- assert( dest.eval_start_index < dest.face_valence );
- dest.eval_unique_identifier = 0;
- }
-
- __noinline void subdivide(array_t<CatmullClarkPatchT,4>& patch) const
- {
- ring[0].subdivide(patch[0].ring[0]);
- ring[1].subdivide(patch[1].ring[1]);
- ring[2].subdivide(patch[2].ring[2]);
- ring[3].subdivide(patch[3].ring[3]);
-
- patch[0].ring[0].edge_level = 0.5f*ring[0].edge_level;
- patch[0].ring[1].edge_level = 0.25f*(ring[1].edge_level+ring[3].edge_level);
- patch[0].ring[2].edge_level = 0.25f*(ring[0].edge_level+ring[2].edge_level);
- patch[0].ring[3].edge_level = 0.5f*ring[3].edge_level;
-
- patch[1].ring[0].edge_level = 0.5f*ring[0].edge_level;
- patch[1].ring[1].edge_level = 0.5f*ring[1].edge_level;
- patch[1].ring[2].edge_level = 0.25f*(ring[0].edge_level+ring[2].edge_level);
- patch[1].ring[3].edge_level = 0.25f*(ring[1].edge_level+ring[3].edge_level);
-
- patch[2].ring[0].edge_level = 0.25f*(ring[0].edge_level+ring[2].edge_level);
- patch[2].ring[1].edge_level = 0.5f*ring[1].edge_level;
- patch[2].ring[2].edge_level = 0.5f*ring[2].edge_level;
- patch[2].ring[3].edge_level = 0.25f*(ring[1].edge_level+ring[3].edge_level);
-
- patch[3].ring[0].edge_level = 0.25f*(ring[0].edge_level+ring[2].edge_level);
- patch[3].ring[1].edge_level = 0.25f*(ring[1].edge_level+ring[3].edge_level);
- patch[3].ring[2].edge_level = 0.5f*ring[2].edge_level;
- patch[3].ring[3].edge_level = 0.5f*ring[3].edge_level;
-
- const bool regular0 = ring[0].has_last_face() && ring[1].face_valence > 2;
- if (likely(regular0))
- init_regular(patch[0].ring[0],patch[1].ring[1],patch[0].ring[1],patch[1].ring[0]);
- else
- init_border(patch[0].ring[0],patch[1].ring[1],patch[0].ring[1],patch[1].ring[0]);
-
- const bool regular1 = ring[1].has_last_face() && ring[2].face_valence > 2;
- if (likely(regular1))
- init_regular(patch[1].ring[1],patch[2].ring[2],patch[1].ring[2],patch[2].ring[1]);
- else
- init_border(patch[1].ring[1],patch[2].ring[2],patch[1].ring[2],patch[2].ring[1]);
-
- const bool regular2 = ring[2].has_last_face() && ring[3].face_valence > 2;
- if (likely(regular2))
- init_regular(patch[2].ring[2],patch[3].ring[3],patch[2].ring[3],patch[3].ring[2]);
- else
- init_border(patch[2].ring[2],patch[3].ring[3],patch[2].ring[3],patch[3].ring[2]);
-
- const bool regular3 = ring[3].has_last_face() && ring[0].face_valence > 2;
- if (likely(regular3))
- init_regular(patch[3].ring[3],patch[0].ring[0],patch[3].ring[0],patch[0].ring[3]);
- else
- init_border(patch[3].ring[3],patch[0].ring[0],patch[3].ring[0],patch[0].ring[3]);
-
- Vertex_t center = (ring[0].vtx + ring[1].vtx + ring[2].vtx + ring[3].vtx) * 0.25f;
-
- Vertex_t center_ring[8];
- center_ring[0] = (Vertex_t)patch[3].ring[3].ring[0];
- center_ring[7] = (Vertex_t)patch[3].ring[3].vtx;
- center_ring[6] = (Vertex_t)patch[2].ring[2].ring[0];
- center_ring[5] = (Vertex_t)patch[2].ring[2].vtx;
- center_ring[4] = (Vertex_t)patch[1].ring[1].ring[0];
- center_ring[3] = (Vertex_t)patch[1].ring[1].vtx;
- center_ring[2] = (Vertex_t)patch[0].ring[0].ring[0];
- center_ring[1] = (Vertex_t)patch[0].ring[0].vtx;
-
- init_regular(center,center_ring,0,patch[0].ring[2]);
- init_regular(center,center_ring,2,patch[1].ring[3]);
- init_regular(center,center_ring,4,patch[2].ring[0]);
- init_regular(center,center_ring,6,patch[3].ring[1]);
-
- assert(patch[0].verify());
- assert(patch[1].verify());
- assert(patch[2].verify());
- assert(patch[3].verify());
- }
-
- bool verify() const {
- return ring[0].hasValidPositions() && ring[1].hasValidPositions() && ring[2].hasValidPositions() && ring[3].hasValidPositions();
- }
-
- __forceinline void init( FinalQuad& quad ) const
- {
- quad.vtx[0] = (Vertex_t)ring[0].vtx;
- quad.vtx[1] = (Vertex_t)ring[1].vtx;
- quad.vtx[2] = (Vertex_t)ring[2].vtx;
- quad.vtx[3] = (Vertex_t)ring[3].vtx;
- };
-
- friend __forceinline embree_ostream operator<<(embree_ostream o, const CatmullClarkPatchT &p)
- {
- o << "CatmullClarkPatch { " << embree_endl;
- for (size_t i=0; i<4; i++)
- o << "ring" << i << ": " << p.ring[i] << embree_endl;
- o << "}" << embree_endl;
- return o;
- }
- };
-
- typedef CatmullClarkPatchT<Vec3fa,Vec3fa_t> CatmullClarkPatch3fa;
-
- template<typename Vertex, typename Vertex_t = Vertex>
- class __aligned(64) GeneralCatmullClarkPatchT
- {
- public:
- typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
- typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClark1Ring;
- typedef BezierCurveT<Vertex> BezierCurve;
-
- static const unsigned SIZE = MAX_PATCH_VALENCE;
- DynamicStackArray<GeneralCatmullClark1RingT<Vertex,Vertex_t>,8,SIZE> ring;
- unsigned N;
-
- __forceinline GeneralCatmullClarkPatchT ()
- : N(0) {}
-
- GeneralCatmullClarkPatchT (const HalfEdge* h, const char* vertices, size_t stride) {
- init(h,vertices,stride);
- }
-
- __forceinline GeneralCatmullClarkPatchT (const HalfEdge* first_half_edge, const BufferView<Vec3fa>& vertices) {
- init(first_half_edge,vertices.getPtr(),vertices.getStride());
- }
-
- __forceinline void init (const HalfEdge* h, const char* vertices, size_t stride)
- {
- unsigned int i = 0;
- const HalfEdge* edge = h;
- do {
- ring[i].init(edge,vertices,stride);
- edge = edge->next();
- i++;
- } while ((edge != h) && (i < SIZE));
- N = i;
- }
-
- __forceinline unsigned size() const {
- return N;
- }
-
- __forceinline bool isQuadPatch() const {
- return (N == 4) && ring[0].only_quads && ring[1].only_quads && ring[2].only_quads && ring[3].only_quads;
- }
-
- static __forceinline void init_regular(const CatmullClark1RingT<Vertex,Vertex_t>& p0,
- const CatmullClark1RingT<Vertex,Vertex_t>& p1,
- CatmullClark1RingT<Vertex,Vertex_t>& dest0,
- CatmullClark1RingT<Vertex,Vertex_t>& dest1)
- {
- assert(p1.face_valence > 2);
- dest1.vertex_level = dest0.vertex_level = p0.edge_level;
- dest1.face_valence = dest0.face_valence = 4;
- dest1.edge_valence = dest0.edge_valence = 8;
- dest1.border_index = dest0.border_index = -1;
- dest1.vtx = dest0.vtx = (Vertex_t)p0.ring[0];
- dest1.vertex_crease_weight = dest0.vertex_crease_weight = 0.0f;
-
- dest1.ring[2] = dest0.ring[0] = (Vertex_t)p0.ring[1];
- dest1.ring[1] = dest0.ring[7] = (Vertex_t)p1.ring[0];
- dest1.ring[0] = dest0.ring[6] = (Vertex_t)p1.vtx;
- dest1.ring[7] = dest0.ring[5] = (Vertex_t)p1.ring[4];
- dest1.ring[6] = dest0.ring[4] = (Vertex_t)p0.ring[p0.edge_valence-1];
- dest1.ring[5] = dest0.ring[3] = (Vertex_t)p0.ring[p0.edge_valence-2];
- dest1.ring[4] = dest0.ring[2] = (Vertex_t)p0.vtx;
- dest1.ring[3] = dest0.ring[1] = (Vertex_t)p0.ring[2];
-
- dest1.crease_weight[1] = dest0.crease_weight[0] = 0.0f;
- dest1.crease_weight[0] = dest0.crease_weight[3] = p1.crease_weight[1];
- dest1.crease_weight[3] = dest0.crease_weight[2] = 0.0f;
- dest1.crease_weight[2] = dest0.crease_weight[1] = p0.crease_weight[0];
-
- if (p0.eval_unique_identifier <= p1.eval_unique_identifier)
- {
- dest0.eval_start_index = 3;
- dest1.eval_start_index = 0;
- dest0.eval_unique_identifier = p0.eval_unique_identifier;
- dest1.eval_unique_identifier = p0.eval_unique_identifier;
- }
- else
- {
- dest0.eval_start_index = 1;
- dest1.eval_start_index = 2;
- dest0.eval_unique_identifier = p1.eval_unique_identifier;
- dest1.eval_unique_identifier = p1.eval_unique_identifier;
- }
- }
-
-
- static __forceinline void init_border(const CatmullClark1RingT<Vertex,Vertex_t> &p0,
- const CatmullClark1RingT<Vertex,Vertex_t> &p1,
- CatmullClark1RingT<Vertex,Vertex_t> &dest0,
- CatmullClark1RingT<Vertex,Vertex_t> &dest1)
- {
- dest1.vertex_level = dest0.vertex_level = p0.edge_level;
- dest1.face_valence = dest0.face_valence = 3;
- dest1.edge_valence = dest0.edge_valence = 6;
- dest0.border_index = 2;
- dest1.border_index = 4;
- dest1.vtx = dest0.vtx = (Vertex_t)p0.ring[0];
- dest1.vertex_crease_weight = dest0.vertex_crease_weight = 0.0f;
-
- dest1.ring[2] = dest0.ring[0] = (Vertex_t)p0.ring[1];
- dest1.ring[1] = dest0.ring[5] = (Vertex_t)p1.ring[0];
- dest1.ring[0] = dest0.ring[4] = (Vertex_t)p1.vtx;
- dest1.ring[5] = dest0.ring[3] = (Vertex_t)p0.ring[p0.border_index+1]; // dummy
- dest1.ring[4] = dest0.ring[2] = (Vertex_t)p0.vtx;
- dest1.ring[3] = dest0.ring[1] = (Vertex_t)p0.ring[2];
-
- dest1.crease_weight[1] = dest0.crease_weight[0] = 0.0f;
- dest1.crease_weight[0] = dest0.crease_weight[2] = p1.crease_weight[1];
- dest1.crease_weight[2] = dest0.crease_weight[1] = p0.crease_weight[0];
-
- if (p0.eval_unique_identifier <= p1.eval_unique_identifier)
- {
- dest0.eval_start_index = 1;
- dest1.eval_start_index = 2;
- dest0.eval_unique_identifier = p0.eval_unique_identifier;
- dest1.eval_unique_identifier = p0.eval_unique_identifier;
- }
- else
- {
- dest0.eval_start_index = 2;
- dest1.eval_start_index = 0;
- dest0.eval_unique_identifier = p1.eval_unique_identifier;
- dest1.eval_unique_identifier = p1.eval_unique_identifier;
- }
- }
-
- static __forceinline void init_regular(const Vertex_t &center, const array_t<Vertex_t,2*SIZE>& center_ring, const float vertex_level, const unsigned int N, const unsigned int offset, CatmullClark1RingT<Vertex,Vertex_t> &dest)
- {
- assert(N<(MAX_RING_FACE_VALENCE));
- assert(2*N<(MAX_RING_EDGE_VALENCE));
- dest.vertex_level = vertex_level;
- dest.face_valence = N;
- dest.edge_valence = 2*N;
- dest.border_index = -1;
- dest.vtx = (Vertex_t)center;
- dest.vertex_crease_weight = 0.0f;
- for (unsigned i=0; i<2*N; i++) {
- dest.ring[i] = (Vertex_t)center_ring[(2*N+offset+i-1)%(2*N)];
- assert(isvalid(dest.ring[i]));
- }
- for (unsigned i=0; i<N; i++)
- dest.crease_weight[i] = 0.0f;
-
- assert(offset <= 2*N);
- dest.eval_start_index = (2*N-offset)>>1;
- if (dest.eval_start_index >= dest.face_valence) dest.eval_start_index -= dest.face_valence;
-
- assert( dest.eval_start_index < dest.face_valence );
- dest.eval_unique_identifier = 0;
- }
-
- __noinline void subdivide(array_t<CatmullClarkPatch,SIZE>& patch, unsigned& N_o) const
- {
- N_o = N;
- assert( N );
- for (unsigned i=0; i<N; i++) {
- unsigned ip1 = (i+1)%N; // FIXME: %
- ring[i].subdivide(patch[i].ring[0]);
- patch[i] .ring[0].edge_level = 0.5f*ring[i].edge_level;
- patch[ip1].ring[3].edge_level = 0.5f*ring[i].edge_level;
-
- assert( patch[i].ring[0].hasValidPositions() );
-
- }
- assert(N < 2*SIZE);
- Vertex_t center = Vertex_t(0.0f);
- array_t<Vertex_t,2*SIZE> center_ring;
- float center_vertex_level = 2.0f; // guarantees that irregular vertices get always isolated also for non-quads
-
- for (unsigned i=0; i<N; i++)
- {
- unsigned ip1 = (i+1)%N; // FIXME: %
- unsigned im1 = (i+N-1)%N; // FIXME: %
- bool regular = ring[i].has_last_face() && ring[ip1].face_valence > 2;
- if (likely(regular)) init_regular(patch[i].ring[0],patch[ip1].ring[0],patch[i].ring[1],patch[ip1].ring[3]);
- else init_border (patch[i].ring[0],patch[ip1].ring[0],patch[i].ring[1],patch[ip1].ring[3]);
-
- assert( patch[i].ring[1].hasValidPositions() );
- assert( patch[ip1].ring[3].hasValidPositions() );
-
- float level = 0.25f*(ring[im1].edge_level+ring[ip1].edge_level);
- patch[i].ring[1].edge_level = patch[ip1].ring[2].edge_level = level;
- center_vertex_level = max(center_vertex_level,level);
-
- center += ring[i].vtx;
- center_ring[2*i+0] = (Vertex_t)patch[i].ring[0].vtx;
- center_ring[2*i+1] = (Vertex_t)patch[i].ring[0].ring[0];
- }
- center /= float(N);
-
- for (unsigned int i=0; i<N; i++) {
- init_regular(center,center_ring,center_vertex_level,N,2*i,patch[i].ring[2]);
-
- assert( patch[i].ring[2].hasValidPositions() );
- }
- }
-
- void init(CatmullClarkPatch& patch) const
- {
- assert(size() == 4);
- ring[0].convert(patch.ring[0]);
- ring[1].convert(patch.ring[1]);
- ring[2].convert(patch.ring[2]);
- ring[3].convert(patch.ring[3]);
- }
-
- static void fix_quad_ring_order (array_t<CatmullClarkPatch,GeneralCatmullClarkPatchT::SIZE>& patches)
- {
- CatmullClark1Ring patches1ring1 = patches[1].ring[1];
- patches[1].ring[1] = patches[1].ring[0]; // FIXME: optimize these assignments
- patches[1].ring[0] = patches[1].ring[3];
- patches[1].ring[3] = patches[1].ring[2];
- patches[1].ring[2] = patches1ring1;
-
- CatmullClark1Ring patches2ring2 = patches[2].ring[2];
- patches[2].ring[2] = patches[2].ring[0];
- patches[2].ring[0] = patches2ring2;
- CatmullClark1Ring patches2ring3 = patches[2].ring[3];
- patches[2].ring[3] = patches[2].ring[1];
- patches[2].ring[1] = patches2ring3;
-
- CatmullClark1Ring patches3ring3 = patches[3].ring[3];
- patches[3].ring[3] = patches[3].ring[0];
- patches[3].ring[0] = patches[3].ring[1];
- patches[3].ring[1] = patches[3].ring[2];
- patches[3].ring[2] = patches3ring3;
- }
-
- __forceinline void getLimitBorder(BezierCurve curves[GeneralCatmullClarkPatchT::SIZE]) const
- {
- Vertex P0 = ring[0].getLimitVertex();
- for (unsigned i=0; i<N; i++)
- {
- const unsigned i0 = i, i1 = i+1==N ? 0 : i+1;
- const Vertex P1 = madd(1.0f/3.0f,ring[i0].getLimitTangent(),P0);
- const Vertex P3 = ring[i1].getLimitVertex();
- const Vertex P2 = madd(1.0f/3.0f,ring[i1].getSecondLimitTangent(),P3);
- new (&curves[i]) BezierCurve(P0,P1,P2,P3);
- P0 = P3;
- }
- }
-
- __forceinline void getLimitBorder(BezierCurve curves[2], const unsigned subPatch) const
- {
- const unsigned i0 = subPatch;
- const Vertex t0_p = ring[i0].getLimitTangent();
- const Vertex t0_m = ring[i0].getSecondLimitTangent();
-
- const unsigned i1 = subPatch+1 == N ? 0 : subPatch+1;
- const Vertex t1_p = ring[i1].getLimitTangent();
- const Vertex t1_m = ring[i1].getSecondLimitTangent();
-
- const unsigned i2 = subPatch == 0 ? N-1 : subPatch-1;
- const Vertex t2_p = ring[i2].getLimitTangent();
- const Vertex t2_m = ring[i2].getSecondLimitTangent();
-
- const Vertex b00 = ring[i0].getLimitVertex();
- const Vertex b03 = ring[i1].getLimitVertex();
- const Vertex b33 = ring[i2].getLimitVertex();
-
- const Vertex b01 = madd(1.0/3.0f,t0_p,b00);
- const Vertex b11 = madd(1.0/3.0f,t0_m,b00);
-
- //const Vertex b13 = madd(1.0/3.0f,t1_p,b03);
- const Vertex b02 = madd(1.0/3.0f,t1_m,b03);
-
- const Vertex b22 = madd(1.0/3.0f,t2_p,b33);
- const Vertex b23 = madd(1.0/3.0f,t2_m,b33);
-
- new (&curves[0]) BezierCurve(b00,b01,b02,b03);
- new (&curves[1]) BezierCurve(b33,b22,b11,b00);
- }
-
- friend __forceinline embree_ostream operator<<(embree_ostream o, const GeneralCatmullClarkPatchT &p)
- {
- o << "GeneralCatmullClarkPatch { " << embree_endl;
- for (unsigned i=0; i<p.N; i++)
- o << "ring" << i << ": " << p.ring[i] << embree_endl;
- o << "}" << embree_endl;
- return o;
- }
- };
-
- typedef GeneralCatmullClarkPatchT<Vec3fa,Vec3fa_t> GeneralCatmullClarkPatch3fa;
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/catmullclark_ring.h b/thirdparty/embree-aarch64/kernels/subdiv/catmullclark_ring.h
deleted file mode 100644
index 73b41fd4ff..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/catmullclark_ring.h
+++ /dev/null
@@ -1,826 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/geometry.h"
-#include "../common/buffer.h"
-#include "half_edge.h"
-#include "catmullclark_coefficients.h"
-
-namespace embree
-{
- struct __aligned(64) FinalQuad {
- Vec3fa vtx[4];
- };
-
- template<typename Vertex, typename Vertex_t = Vertex>
- struct __aligned(64) CatmullClark1RingT
- {
- ALIGNED_STRUCT_(64);
-
- int border_index; //!< edge index where border starts
- unsigned int face_valence; //!< number of adjacent quad faces
- unsigned int edge_valence; //!< number of adjacent edges (2*face_valence)
- float vertex_crease_weight; //!< weight of vertex crease (0 if no vertex crease)
- DynamicStackArray<float,16,MAX_RING_FACE_VALENCE> crease_weight; //!< edge crease weights for each adjacent edge
- float vertex_level; //!< maximum level of all adjacent edges
- float edge_level; //!< level of first edge
- unsigned int eval_start_index; //!< topology dependent index to start evaluation
- unsigned int eval_unique_identifier; //!< topology dependent unique identifier for this ring
- Vertex vtx; //!< center vertex
- DynamicStackArray<Vertex,32,MAX_RING_EDGE_VALENCE> ring; //!< ring of neighboring vertices
-
- public:
- CatmullClark1RingT ()
- : eval_start_index(0), eval_unique_identifier(0) {} // FIXME: default constructor should be empty
-
- /*! calculates number of bytes required to serialize this structure */
- __forceinline size_t bytes() const
- {
- size_t ofs = 0;
- ofs += sizeof(border_index);
- ofs += sizeof(face_valence);
- assert(2*face_valence == edge_valence);
- ofs += sizeof(vertex_crease_weight);
- ofs += face_valence*sizeof(float);
- ofs += sizeof(vertex_level);
- ofs += sizeof(edge_level);
- ofs += sizeof(eval_start_index);
- ofs += sizeof(eval_unique_identifier);
- ofs += sizeof(vtx);
- ofs += edge_valence*sizeof(Vertex);
- return ofs;
- }
-
- template<typename Ty>
- static __forceinline void store(char* ptr, size_t& ofs, const Ty& v) {
- *(Ty*)&ptr[ofs] = v; ofs += sizeof(Ty);
- }
-
- template<typename Ty>
- static __forceinline void load(char* ptr, size_t& ofs, Ty& v) {
- v = *(Ty*)&ptr[ofs]; ofs += sizeof(Ty);
- }
-
- /*! serializes the ring to some memory location */
- __forceinline void serialize(char* ptr, size_t& ofs) const
- {
- store(ptr,ofs,border_index);
- store(ptr,ofs,face_valence);
- store(ptr,ofs,vertex_crease_weight);
- for (size_t i=0; i<face_valence; i++)
- store(ptr,ofs,crease_weight[i]);
- store(ptr,ofs,vertex_level);
- store(ptr,ofs,edge_level);
- store(ptr,ofs,eval_start_index);
- store(ptr,ofs,eval_unique_identifier);
- Vertex_t::storeu(&ptr[ofs],vtx); ofs += sizeof(Vertex);
- for (size_t i=0; i<edge_valence; i++) {
- Vertex_t::storeu(&ptr[ofs],ring[i]); ofs += sizeof(Vertex);
- }
- }
-
- /*! deserializes the ring from some memory location */
- __forceinline void deserialize(char* ptr, size_t& ofs)
- {
- load(ptr,ofs,border_index);
- load(ptr,ofs,face_valence);
- edge_valence = 2*face_valence;
- load(ptr,ofs,vertex_crease_weight);
- for (size_t i=0; i<face_valence; i++)
- load(ptr,ofs,crease_weight[i]);
- load(ptr,ofs,vertex_level);
- load(ptr,ofs,edge_level);
- load(ptr,ofs,eval_start_index);
- load(ptr,ofs,eval_unique_identifier);
- vtx = Vertex_t::loadu(&ptr[ofs]); ofs += sizeof(Vertex);
- for (size_t i=0; i<edge_valence; i++) {
- ring[i] = Vertex_t::loadu(&ptr[ofs]); ofs += sizeof(Vertex);
- }
- }
-
- __forceinline bool hasBorder() const {
- return border_index != -1;
- }
-
- __forceinline const Vertex& front(size_t i) const {
- assert(edge_valence>i);
- return ring[i];
- }
-
- __forceinline const Vertex& back(size_t i) const {
- assert(edge_valence>=i);
- return ring[edge_valence-i];
- }
-
- __forceinline bool has_last_face() const {
- return (size_t)border_index != (size_t)edge_valence-2;
- }
-
- __forceinline bool has_opposite_front(size_t i) const {
- return (size_t)border_index != 2*i;
- }
-
- __forceinline bool has_opposite_back(size_t i) const {
- return (size_t)border_index != ((size_t)edge_valence-2-2*i);
- }
-
- __forceinline BBox3fa bounds() const
- {
- BBox3fa bounds ( vtx );
- for (size_t i = 0; i<edge_valence ; i++)
- bounds.extend( ring[i] );
- return bounds;
- }
-
- /*! initializes the ring from the half edge structure */
- __forceinline void init(const HalfEdge* const h, const char* vertices, size_t stride)
- {
- border_index = -1;
- vtx = Vertex_t::loadu(vertices+h->getStartVertexIndex()*stride);
- vertex_crease_weight = h->vertex_crease_weight;
-
- HalfEdge* p = (HalfEdge*) h;
-
- unsigned i=0;
- unsigned min_vertex_index = (unsigned)-1;
- unsigned min_vertex_index_face = (unsigned)-1;
- edge_level = p->edge_level;
- vertex_level = 0.0f;
-
- do
- {
- vertex_level = max(vertex_level,p->edge_level);
- crease_weight[i/2] = p->edge_crease_weight;
- assert(p->hasOpposite() || p->edge_crease_weight == float(inf));
-
- /* store first two vertices of face */
- p = p->next();
- const unsigned index0 = p->getStartVertexIndex();
- ring[i++] = Vertex_t::loadu(vertices+index0*stride);
- if (index0 < min_vertex_index) { min_vertex_index = index0; min_vertex_index_face = i>>1; }
- p = p->next();
-
- const unsigned index1 = p->getStartVertexIndex();
- ring[i++] = Vertex_t::loadu(vertices+index1*stride);
- p = p->next();
-
- /* continue with next face */
- if (likely(p->hasOpposite()))
- p = p->opposite();
-
- /* if there is no opposite go the long way to the other side of the border */
- else
- {
- /* find minimum start vertex */
- const unsigned index0 = p->getStartVertexIndex();
- if (index0 < min_vertex_index) { min_vertex_index = index0; min_vertex_index_face = i>>1; }
-
- /*! mark first border edge and store dummy vertex for face between the two border edges */
- border_index = i;
- crease_weight[i/2] = inf;
- ring[i++] = Vertex_t::loadu(vertices+index0*stride);
- ring[i++] = vtx; // dummy vertex
-
- /*! goto other side of border */
- p = (HalfEdge*) h;
- while (p->hasOpposite())
- p = p->opposite()->next();
- }
-
- } while (p != h);
-
- edge_valence = i;
- face_valence = i >> 1;
- eval_unique_identifier = min_vertex_index;
- eval_start_index = min_vertex_index_face;
-
- assert( hasValidPositions() );
- }
-
- __forceinline void subdivide(CatmullClark1RingT& dest) const
- {
- dest.edge_level = 0.5f*edge_level;
- dest.vertex_level = 0.5f*vertex_level;
- dest.face_valence = face_valence;
- dest.edge_valence = edge_valence;
- dest.border_index = border_index;
- dest.vertex_crease_weight = max(0.0f,vertex_crease_weight-1.0f);
- dest.eval_start_index = eval_start_index;
- dest.eval_unique_identifier = eval_unique_identifier;
-
- /* calculate face points */
- Vertex_t S = Vertex_t(0.0f);
- for (size_t i=0; i<face_valence; i++)
- {
- size_t face_index = i + eval_start_index; if (face_index >= face_valence) face_index -= face_valence; assert(face_index < face_valence);
- size_t index0 = 2*face_index+0; if (index0 >= edge_valence) index0 -= edge_valence; assert(index0 < edge_valence);
- size_t index1 = 2*face_index+1; if (index1 >= edge_valence) index1 -= edge_valence; assert(index1 < edge_valence);
- size_t index2 = 2*face_index+2; if (index2 >= edge_valence) index2 -= edge_valence; assert(index2 < edge_valence);
- S += dest.ring[index1] = ((vtx + ring[index1]) + (ring[index0] + ring[index2])) * 0.25f;
- }
-
- /* calculate new edge points */
- size_t num_creases = 0;
- array_t<size_t,MAX_RING_FACE_VALENCE> crease_id;
-
- for (size_t i=0; i<face_valence; i++)
- {
- size_t face_index = i + eval_start_index;
- if (face_index >= face_valence) face_index -= face_valence;
- const float edge_crease = crease_weight[face_index];
- dest.crease_weight[face_index] = max(edge_crease-1.0f,0.0f);
-
- size_t index = 2*face_index;
- size_t prev_index = face_index == 0 ? edge_valence-1 : 2*face_index-1;
- size_t next_index = 2*face_index+1;
-
- const Vertex_t v = vtx + ring[index];
- const Vertex_t f = dest.ring[prev_index] + dest.ring[next_index];
- S += ring[index];
-
- /* fast path for regular edge points */
- if (likely(edge_crease <= 0.0f)) {
- dest.ring[index] = (v+f) * 0.25f;
- }
-
- /* slower path for hard edge rule */
- else {
- crease_id[num_creases++] = face_index;
- dest.ring[index] = v*0.5f;
-
- /* even slower path for blended edge rule */
- if (unlikely(edge_crease < 1.0f)) {
- dest.ring[index] = lerp((v+f)*0.25f,v*0.5f,edge_crease);
- }
- }
- }
-
- /* compute new vertex using smooth rule */
- const float inv_face_valence = 1.0f / (float)face_valence;
- const Vertex_t v_smooth = (Vertex_t) madd(inv_face_valence,S,(float(face_valence)-2.0f)*vtx)*inv_face_valence;
- dest.vtx = v_smooth;
-
- /* compute new vertex using vertex_crease_weight rule */
- if (unlikely(vertex_crease_weight > 0.0f))
- {
- if (vertex_crease_weight >= 1.0f) {
- dest.vtx = vtx;
- } else {
- dest.vtx = lerp(v_smooth,vtx,vertex_crease_weight);
- }
- return;
- }
-
- /* no edge crease rule and dart rule */
- if (likely(num_creases <= 1))
- return;
-
- /* compute new vertex using crease rule */
- if (likely(num_creases == 2))
- {
- /* update vertex using crease rule */
- const size_t crease0 = crease_id[0], crease1 = crease_id[1];
- const Vertex_t v_sharp = (Vertex_t)(ring[2*crease0] + 6.0f*vtx + ring[2*crease1]) * (1.0f / 8.0f);
- dest.vtx = v_sharp;
-
- /* update crease_weights using chaikin rule */
- const float crease_weight0 = crease_weight[crease0], crease_weight1 = crease_weight[crease1];
- dest.crease_weight[crease0] = max(0.25f*(3.0f*crease_weight0 + crease_weight1)-1.0f,0.0f);
- dest.crease_weight[crease1] = max(0.25f*(3.0f*crease_weight1 + crease_weight0)-1.0f,0.0f);
-
- /* interpolate between sharp and smooth rule */
- const float v_blend = 0.5f*(crease_weight0+crease_weight1);
- if (unlikely(v_blend < 1.0f)) {
- dest.vtx = lerp(v_smooth,v_sharp,v_blend);
- }
- }
-
- /* compute new vertex using corner rule */
- else {
- dest.vtx = vtx;
- }
- }
-
- __forceinline bool isRegular1() const
- {
- if (border_index == -1) {
- if (face_valence == 4) return true;
- } else {
- if (face_valence < 4) return true;
- }
- return false;
- }
-
- __forceinline size_t numEdgeCreases() const
- {
- ssize_t numCreases = 0;
- for (size_t i=0; i<face_valence; i++) {
- numCreases += crease_weight[i] > 0.0f;
- }
- return numCreases;
- }
-
- enum Type {
- TYPE_NONE = 0, //!< invalid type
- TYPE_REGULAR = 1, //!< regular patch when ignoring creases
- TYPE_REGULAR_CREASES = 2, //!< regular patch when considering creases
- TYPE_GREGORY = 4, //!< gregory patch when ignoring creases
- TYPE_GREGORY_CREASES = 8, //!< gregory patch when considering creases
- TYPE_CREASES = 16 //!< patch has crease features
- };
-
- __forceinline Type type() const
- {
- /* check if there is an edge crease anywhere */
- const size_t numCreases = numEdgeCreases();
- const bool noInnerCreases = hasBorder() ? numCreases == 2 : numCreases == 0;
-
- Type crease_mask = (Type) (TYPE_REGULAR | TYPE_GREGORY);
- if (noInnerCreases ) crease_mask = (Type) (crease_mask | TYPE_REGULAR_CREASES | TYPE_GREGORY_CREASES);
- if (numCreases != 0) crease_mask = (Type) (crease_mask | TYPE_CREASES);
-
- /* calculate if this vertex is regular */
- bool hasBorder = border_index != -1;
- if (face_valence == 2 && hasBorder) {
- if (vertex_crease_weight == 0.0f ) return (Type) (crease_mask & (TYPE_REGULAR | TYPE_REGULAR_CREASES | TYPE_GREGORY | TYPE_GREGORY_CREASES | TYPE_CREASES));
- else if (vertex_crease_weight == float(inf)) return (Type) (crease_mask & (TYPE_REGULAR | TYPE_REGULAR_CREASES | TYPE_GREGORY | TYPE_GREGORY_CREASES | TYPE_CREASES));
- else return TYPE_CREASES;
- }
- else if (vertex_crease_weight != 0.0f) return TYPE_CREASES;
- else if (face_valence == 3 && hasBorder) return (Type) (crease_mask & (TYPE_REGULAR | TYPE_REGULAR_CREASES | TYPE_GREGORY | TYPE_GREGORY_CREASES | TYPE_CREASES));
- else if (face_valence == 4 && !hasBorder) return (Type) (crease_mask & (TYPE_REGULAR | TYPE_REGULAR_CREASES | TYPE_GREGORY | TYPE_GREGORY_CREASES | TYPE_CREASES));
- else return (Type) (crease_mask & (TYPE_GREGORY | TYPE_GREGORY_CREASES | TYPE_CREASES));
- }
-
- __forceinline bool isFinalResolution(float res) const {
- return vertex_level <= res;
- }
-
- /* computes the limit vertex */
- __forceinline Vertex getLimitVertex() const
- {
- /* return hard corner */
- if (unlikely(std::isinf(vertex_crease_weight)))
- return vtx;
-
- /* border vertex rule */
- if (unlikely(border_index != -1))
- {
- const unsigned int second_border_index = border_index+2 >= int(edge_valence) ? 0 : border_index+2;
- return (4.0f * vtx + (ring[border_index] + ring[second_border_index])) * 1.0f/6.0f;
- }
-
- Vertex_t F( 0.0f );
- Vertex_t E( 0.0f );
-
- assert(eval_start_index < face_valence);
-
- for (size_t i=0; i<face_valence; i++) {
- size_t index = i+eval_start_index;
- if (index >= face_valence) index -= face_valence;
- F += ring[2*index+1];
- E += ring[2*index];
- }
-
- const float n = (float)face_valence;
- return (Vertex_t)(n*n*vtx+4.0f*E+F) / ((n+5.0f)*n);
- }
-
- /* gets limit tangent in the direction of egde vtx -> ring[0] */
- __forceinline Vertex getLimitTangent() const
- {
- if (unlikely(std::isinf(vertex_crease_weight)))
- return ring[0] - vtx;
-
- /* border vertex rule */
- if (unlikely(border_index != -1))
- {
- if (border_index != (int)edge_valence-2 ) {
- return ring[0] - vtx;
- }
- else
- {
- const unsigned int second_border_index = border_index+2 >= int(edge_valence) ? 0 : border_index+2;
- return (ring[second_border_index] - ring[border_index]) * 0.5f;
- }
- }
-
- Vertex_t alpha( 0.0f );
- Vertex_t beta ( 0.0f );
-
- const size_t n = face_valence;
-
- assert(eval_start_index < face_valence);
-
- Vertex_t q( 0.0f );
- for (size_t i=0; i<face_valence; i++)
- {
- size_t index = i+eval_start_index;
- if (index >= face_valence) index -= face_valence;
- const float a = CatmullClarkPrecomputedCoefficients::table.limittangent_a(index,n);
- const float b = CatmullClarkPrecomputedCoefficients::table.limittangent_b(index,n);
- alpha += a * ring[2*index];
- beta += b * ring[2*index+1];
- }
-
- const float sigma = CatmullClarkPrecomputedCoefficients::table.limittangent_c(n);
- return sigma * (alpha + beta);
- }
-
- /* gets limit tangent in the direction of egde vtx -> ring[edge_valence-2] */
- __forceinline Vertex getSecondLimitTangent() const
- {
- if (unlikely(std::isinf(vertex_crease_weight)))
- return ring[2] - vtx;
-
- /* border vertex rule */
- if (unlikely(border_index != -1))
- {
- if (border_index != 2) {
- return ring[2] - vtx;
- }
- else {
- const unsigned int second_border_index = border_index+2 >= int(edge_valence) ? 0 : border_index+2;
- return (ring[border_index] - ring[second_border_index]) * 0.5f;
- }
- }
-
- Vertex_t alpha( 0.0f );
- Vertex_t beta ( 0.0f );
-
- const size_t n = face_valence;
-
- assert(eval_start_index < face_valence);
-
- for (size_t i=0; i<face_valence; i++)
- {
- size_t index = i+eval_start_index;
- if (index >= face_valence) index -= face_valence;
-
- size_t prev_index = index == 0 ? face_valence-1 : index-1; // need to be bit-wise exact in cosf eval
- const float a = CatmullClarkPrecomputedCoefficients::table.limittangent_a(prev_index,n);
- const float b = CatmullClarkPrecomputedCoefficients::table.limittangent_b(prev_index,n);
- alpha += a * ring[2*index];
- beta += b * ring[2*index+1];
- }
-
- const float sigma = CatmullClarkPrecomputedCoefficients::table.limittangent_c(n);
- return sigma* (alpha + beta);
- }
-
- /* gets surface normal */
- const Vertex getNormal() const {
- return cross(getLimitTangent(),getSecondLimitTangent());
- }
-
- /* returns center of the n-th quad in the 1-ring */
- __forceinline Vertex getQuadCenter(const size_t index) const
- {
- const Vertex_t &p0 = vtx;
- const Vertex_t &p1 = ring[2*index+0];
- const Vertex_t &p2 = ring[2*index+1];
- const Vertex_t &p3 = index == face_valence-1 ? ring[0] : ring[2*index+2];
- const Vertex p = (p0+p1+p2+p3) * 0.25f;
- return p;
- }
-
- /* returns center of the n-th edge in the 1-ring */
- __forceinline Vertex getEdgeCenter(const size_t index) const {
- return (vtx + ring[index*2]) * 0.5f;
- }
-
- bool hasValidPositions() const
- {
- for (size_t i=0; i<edge_valence; i++) {
- if (!isvalid(ring[i]))
- return false;
- }
- return true;
- }
-
- friend __forceinline embree_ostream operator<<(embree_ostream o, const CatmullClark1RingT &c)
- {
- o << "vtx " << c.vtx << " size = " << c.edge_valence << ", " <<
- "hard_edge = " << c.border_index << ", face_valence " << c.face_valence <<
- ", edge_level = " << c.edge_level << ", vertex_level = " << c.vertex_level << ", eval_start_index: " << c.eval_start_index << ", ring: " << embree_endl;
-
- for (unsigned int i=0; i<min(c.edge_valence,(unsigned int)MAX_RING_FACE_VALENCE); i++) {
- o << i << " -> " << c.ring[i];
- if (i % 2 == 0) o << " crease = " << c.crease_weight[i/2];
- o << embree_endl;
- }
- return o;
- }
- };
-
- typedef CatmullClark1RingT<Vec3fa,Vec3fa_t> CatmullClark1Ring3fa;
-
- template<typename Vertex, typename Vertex_t = Vertex>
- struct __aligned(64) GeneralCatmullClark1RingT
- {
- ALIGNED_STRUCT_(64);
-
- typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClark1Ring;
-
- struct Face
- {
- __forceinline Face() {}
- __forceinline Face (int size, float crease_weight)
- : size(size), crease_weight(crease_weight) {}
-
- // FIXME: add member that returns total number of vertices
-
- int size; // number of vertices-2 of nth face in ring
- float crease_weight;
- };
-
- Vertex vtx;
- DynamicStackArray<Vertex,32,MAX_RING_EDGE_VALENCE> ring;
- DynamicStackArray<Face,16,MAX_RING_FACE_VALENCE> faces;
- unsigned int face_valence;
- unsigned int edge_valence;
- int border_face;
- float vertex_crease_weight;
- float vertex_level; //!< maximum level of adjacent edges
- float edge_level; // level of first edge
- bool only_quads; // true if all faces are quads
- unsigned int eval_start_face_index;
- unsigned int eval_start_vertex_index;
- unsigned int eval_unique_identifier;
-
- public:
- GeneralCatmullClark1RingT()
- : eval_start_face_index(0), eval_start_vertex_index(0), eval_unique_identifier(0) {}
-
- __forceinline bool isRegular() const
- {
- if (border_face == -1 && face_valence == 4) return true;
- return false;
- }
-
- __forceinline bool has_last_face() const {
- return border_face != (int)face_valence-1;
- }
-
- __forceinline bool has_second_face() const {
- return (border_face == -1) || (border_face >= 2);
- }
-
- bool hasValidPositions() const
- {
- for (size_t i=0; i<edge_valence; i++) {
- if (!isvalid(ring[i]))
- return false;
- }
- return true;
- }
-
- __forceinline void init(const HalfEdge* const h, const char* vertices, size_t stride)
- {
- only_quads = true;
- border_face = -1;
- vtx = Vertex_t::loadu(vertices+h->getStartVertexIndex()*stride);
- vertex_crease_weight = h->vertex_crease_weight;
- HalfEdge* p = (HalfEdge*) h;
-
- unsigned int e=0, f=0;
- unsigned min_vertex_index = (unsigned)-1;
- unsigned min_vertex_index_face = (unsigned)-1;
- unsigned min_vertex_index_vertex = (unsigned)-1;
- edge_level = p->edge_level;
- vertex_level = 0.0f;
- do
- {
- HalfEdge* p_prev = p->prev();
- HalfEdge* p_next = p->next();
- const float crease_weight = p->edge_crease_weight;
- assert(p->hasOpposite() || p->edge_crease_weight == float(inf));
- vertex_level = max(vertex_level,p->edge_level);
-
- /* find minimum start vertex */
- unsigned vertex_index = p_next->getStartVertexIndex();
- if (vertex_index < min_vertex_index) { min_vertex_index = vertex_index; min_vertex_index_face = f; min_vertex_index_vertex = e; }
-
- /* store first N-2 vertices of face */
- unsigned int vn = 0;
- for (p = p_next; p!=p_prev; p=p->next()) {
- ring[e++] = Vertex_t::loadu(vertices+p->getStartVertexIndex()*stride);
- vn++;
- }
- faces[f++] = Face(vn,crease_weight);
- only_quads &= (vn == 2);
-
- /* continue with next face */
- if (likely(p->hasOpposite()))
- p = p->opposite();
-
- /* if there is no opposite go the long way to the other side of the border */
- else
- {
- /* find minimum start vertex */
- unsigned vertex_index = p->getStartVertexIndex();
- if (vertex_index < min_vertex_index) { min_vertex_index = vertex_index; min_vertex_index_face = f; min_vertex_index_vertex = e; }
-
- /*! mark first border edge and store dummy vertex for face between the two border edges */
- border_face = f;
- faces[f++] = Face(2,inf);
- ring[e++] = Vertex_t::loadu(vertices+p->getStartVertexIndex()*stride);
- ring[e++] = vtx; // dummy vertex
-
- /*! goto other side of border */
- p = (HalfEdge*) h;
- while (p->hasOpposite())
- p = p->opposite()->next();
- }
-
- } while (p != h);
-
- edge_valence = e;
- face_valence = f;
- eval_unique_identifier = min_vertex_index;
- eval_start_face_index = min_vertex_index_face;
- eval_start_vertex_index = min_vertex_index_vertex;
-
- assert( hasValidPositions() );
- }
-
- __forceinline void subdivide(CatmullClark1Ring& dest) const
- {
- dest.edge_level = 0.5f*edge_level;
- dest.vertex_level = 0.5f*vertex_level;
- dest.face_valence = face_valence;
- dest.edge_valence = 2*face_valence;
- dest.border_index = border_face == -1 ? -1 : 2*border_face; // FIXME:
- dest.vertex_crease_weight = max(0.0f,vertex_crease_weight-1.0f);
- dest.eval_start_index = eval_start_face_index;
- dest.eval_unique_identifier = eval_unique_identifier;
- assert(dest.face_valence <= MAX_RING_FACE_VALENCE);
-
- /* calculate face points */
- Vertex_t S = Vertex_t(0.0f);
- for (size_t face=0, v=eval_start_vertex_index; face<face_valence; face++) {
- size_t f = (face + eval_start_face_index)%face_valence;
-
- Vertex_t F = vtx;
- for (size_t k=v; k<=v+faces[f].size; k++) F += ring[k%edge_valence]; // FIXME: optimize
- S += dest.ring[2*f+1] = F/float(faces[f].size+2);
- v+=faces[f].size;
- v%=edge_valence;
- }
-
- /* calculate new edge points */
- size_t num_creases = 0;
- array_t<size_t,MAX_RING_FACE_VALENCE> crease_id;
- Vertex_t C = Vertex_t(0.0f);
- for (size_t face=0, j=eval_start_vertex_index; face<face_valence; face++)
- {
- size_t i = (face + eval_start_face_index)%face_valence;
-
- const Vertex_t v = vtx + ring[j];
- Vertex_t f = dest.ring[2*i+1];
- if (i == 0) f += dest.ring[dest.edge_valence-1];
- else f += dest.ring[2*i-1];
- S += ring[j];
- dest.crease_weight[i] = max(faces[i].crease_weight-1.0f,0.0f);
-
- /* fast path for regular edge points */
- if (likely(faces[i].crease_weight <= 0.0f)) {
- dest.ring[2*i] = (v+f) * 0.25f;
- }
-
- /* slower path for hard edge rule */
- else {
- C += ring[j]; crease_id[num_creases++] = i;
- dest.ring[2*i] = v*0.5f;
-
- /* even slower path for blended edge rule */
- if (unlikely(faces[i].crease_weight < 1.0f)) {
- dest.ring[2*i] = lerp((v+f)*0.25f,v*0.5f,faces[i].crease_weight);
- }
- }
- j+=faces[i].size;
- j%=edge_valence;
- }
-
- /* compute new vertex using smooth rule */
- const float inv_face_valence = 1.0f / (float)face_valence;
- const Vertex_t v_smooth = (Vertex_t) madd(inv_face_valence,S,(float(face_valence)-2.0f)*vtx)*inv_face_valence;
- dest.vtx = v_smooth;
-
- /* compute new vertex using vertex_crease_weight rule */
- if (unlikely(vertex_crease_weight > 0.0f))
- {
- if (vertex_crease_weight >= 1.0f) {
- dest.vtx = vtx;
- } else {
- dest.vtx = lerp(vtx,v_smooth,vertex_crease_weight);
- }
- return;
- }
-
- if (likely(num_creases <= 1))
- return;
-
- /* compute new vertex using crease rule */
- if (likely(num_creases == 2)) {
- const Vertex_t v_sharp = (Vertex_t)(C + 6.0f * vtx) * (1.0f / 8.0f);
- const float crease_weight0 = faces[crease_id[0]].crease_weight;
- const float crease_weight1 = faces[crease_id[1]].crease_weight;
- dest.vtx = v_sharp;
- dest.crease_weight[crease_id[0]] = max(0.25f*(3.0f*crease_weight0 + crease_weight1)-1.0f,0.0f);
- dest.crease_weight[crease_id[1]] = max(0.25f*(3.0f*crease_weight1 + crease_weight0)-1.0f,0.0f);
- const float v_blend = 0.5f*(crease_weight0+crease_weight1);
- if (unlikely(v_blend < 1.0f)) {
- dest.vtx = lerp(v_sharp,v_smooth,v_blend);
- }
- }
-
- /* compute new vertex using corner rule */
- else {
- dest.vtx = vtx;
- }
- }
-
- void convert(CatmullClark1Ring& dst) const
- {
- dst.edge_level = edge_level;
- dst.vertex_level = vertex_level;
- dst.vtx = vtx;
- dst.face_valence = face_valence;
- dst.edge_valence = 2*face_valence;
- dst.border_index = border_face == -1 ? -1 : 2*border_face;
- for (size_t i=0; i<face_valence; i++)
- dst.crease_weight[i] = faces[i].crease_weight;
- dst.vertex_crease_weight = vertex_crease_weight;
- for (size_t i=0; i<edge_valence; i++) dst.ring[i] = ring[i];
-
- dst.eval_start_index = eval_start_face_index;
- dst.eval_unique_identifier = eval_unique_identifier;
-
- assert( dst.hasValidPositions() );
- }
-
-
- /* gets limit tangent in the direction of egde vtx -> ring[0] */
- __forceinline Vertex getLimitTangent() const
- {
- CatmullClark1Ring cc_vtx;
-
- /* fast path for quad only rings */
- if (only_quads)
- {
- convert(cc_vtx);
- return cc_vtx.getLimitTangent();
- }
-
- subdivide(cc_vtx);
- return 2.0f * cc_vtx.getLimitTangent();
- }
-
- /* gets limit tangent in the direction of egde vtx -> ring[edge_valence-2] */
- __forceinline Vertex getSecondLimitTangent() const
- {
- CatmullClark1Ring cc_vtx;
-
- /* fast path for quad only rings */
- if (only_quads)
- {
- convert(cc_vtx);
- return cc_vtx.getSecondLimitTangent();
- }
-
- subdivide(cc_vtx);
- return 2.0f * cc_vtx.getSecondLimitTangent();
- }
-
-
- /* gets limit vertex */
- __forceinline Vertex getLimitVertex() const
- {
- CatmullClark1Ring cc_vtx;
-
- /* fast path for quad only rings */
- if (only_quads)
- convert(cc_vtx);
- else
- subdivide(cc_vtx);
- return cc_vtx.getLimitVertex();
- }
-
- friend __forceinline embree_ostream operator<<(embree_ostream o, const GeneralCatmullClark1RingT &c)
- {
- o << "vtx " << c.vtx << " size = " << c.edge_valence << ", border_face = " << c.border_face << ", " << " face_valence = " << c.face_valence <<
- ", edge_level = " << c.edge_level << ", vertex_level = " << c.vertex_level << ", ring: " << embree_endl;
- for (size_t v=0, f=0; f<c.face_valence; v+=c.faces[f++].size) {
- for (size_t i=v; i<v+c.faces[f].size; i++) {
- o << i << " -> " << c.ring[i];
- if (i == v) o << " crease = " << c.faces[f].crease_weight;
- o << embree_endl;
- }
- }
- return o;
- }
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/catmullrom_curve.h b/thirdparty/embree-aarch64/kernels/subdiv/catmullrom_curve.h
deleted file mode 100644
index b244af481c..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/catmullrom_curve.h
+++ /dev/null
@@ -1,296 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/default.h"
-#include "../common/scene_curves.h"
-
-/*
-
- Implements Catmul Rom curves with control points p0, p1, p2, p3. At
- t=0 the curve goes through p1, with tangent (p2-p0)/3, and for t=1
- the curve goes through p2 with tangent (p3-p2)/2.
-
- */
-
-namespace embree
-{
- class CatmullRomBasis
- {
- public:
-
- template<typename T>
- static __forceinline Vec4<T> eval(const T& u)
- {
- const T t = u;
- const T s = T(1.0f) - u;
- const T n0 = - t * s * s;
- const T n1 = 2.0f + t * t * (3.0f * t - 5.0f);
- const T n2 = 2.0f + s * s * (3.0f * s - 5.0f);
- const T n3 = - s * t * t;
- return T(0.5f) * Vec4<T>(n0, n1, n2, n3);
- }
-
- template<typename T>
- static __forceinline Vec4<T> derivative(const T& u)
- {
- const T t = u;
- const T s = 1.0f - u;
- const T n0 = - s * s + 2.0f * s * t;
- const T n1 = 2.0f * t * (3.0f * t - 5.0f) + 3.0f * t * t;
- const T n2 = 2.0f * s * (3.0f * t + 2.0f) - 3.0f * s * s;
- const T n3 = -2.0f * s * t + t * t;
- return T(0.5f) * Vec4<T>(n0, n1, n2, n3);
- }
-
- template<typename T>
- static __forceinline Vec4<T> derivative2(const T& u)
- {
- const T t = u;
- const T n0 = -3.0f * t + 2.0f;
- const T n1 = 9.0f * t - 5.0f;
- const T n2 = -9.0f * t + 4.0f;
- const T n3 = 3.0f * t - 1.0f;
- return Vec4<T>(n0, n1, n2, n3);
- }
- };
-
- struct PrecomputedCatmullRomBasis
- {
- enum { N = 16 };
- public:
- PrecomputedCatmullRomBasis() {}
- PrecomputedCatmullRomBasis(int shift);
-
- /* basis for bspline evaluation */
- public:
- float c0[N+1][N+1];
- float c1[N+1][N+1];
- float c2[N+1][N+1];
- float c3[N+1][N+1];
-
- /* basis for bspline derivative evaluation */
- public:
- float d0[N+1][N+1];
- float d1[N+1][N+1];
- float d2[N+1][N+1];
- float d3[N+1][N+1];
- };
- extern PrecomputedCatmullRomBasis catmullrom_basis0;
- extern PrecomputedCatmullRomBasis catmullrom_basis1;
-
- template<typename Vertex>
- struct CatmullRomCurveT
- {
- Vertex v0,v1,v2,v3;
-
- __forceinline CatmullRomCurveT() {}
-
- __forceinline CatmullRomCurveT(const Vertex& v0, const Vertex& v1, const Vertex& v2, const Vertex& v3)
- : v0(v0), v1(v1), v2(v2), v3(v3) {}
-
- __forceinline Vertex begin() const {
- return madd(1.0f/6.0f,v0,madd(2.0f/3.0f,v1,1.0f/6.0f*v2));
- }
-
- __forceinline Vertex end() const {
- return madd(1.0f/6.0f,v1,madd(2.0f/3.0f,v2,1.0f/6.0f*v3));
- }
-
- __forceinline Vertex center() const {
- return 0.25f*(v0+v1+v2+v3);
- }
-
- __forceinline BBox<Vertex> bounds() const {
- return merge(BBox<Vertex>(v0),BBox<Vertex>(v1),BBox<Vertex>(v2),BBox<Vertex>(v3));
- }
-
- __forceinline friend CatmullRomCurveT operator -( const CatmullRomCurveT& a, const Vertex& b ) {
- return CatmullRomCurveT(a.v0-b,a.v1-b,a.v2-b,a.v3-b);
- }
-
- __forceinline CatmullRomCurveT<Vec3ff> xfm_pr(const LinearSpace3fa& space, const Vec3fa& p) const
- {
- const Vec3ff q0(xfmVector(space,v0-p), v0.w);
- const Vec3ff q1(xfmVector(space,v1-p), v1.w);
- const Vec3ff q2(xfmVector(space,v2-p), v2.w);
- const Vec3ff q3(xfmVector(space,v3-p), v3.w);
- return CatmullRomCurveT<Vec3ff>(q0,q1,q2,q3);
- }
-
- __forceinline Vertex eval(const float t) const
- {
- const Vec4<float> b = CatmullRomBasis::eval(t);
- return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
- }
-
- __forceinline Vertex eval_du(const float t) const
- {
- const Vec4<float> b = CatmullRomBasis::derivative(t);
- return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
- }
-
- __forceinline Vertex eval_dudu(const float t) const
- {
- const Vec4<float> b = CatmullRomBasis::derivative2(t);
- return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
- }
-
- __forceinline void eval(const float t, Vertex& p, Vertex& dp, Vertex& ddp) const
- {
- p = eval(t);
- dp = eval_du(t);
- ddp = eval_dudu(t);
- }
-
- template<int M>
- __forceinline Vec4vf<M> veval(const vfloat<M>& t) const
- {
- const Vec4vf<M> b = CatmullRomBasis::eval(t);
- return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
- }
-
- template<int M>
- __forceinline Vec4vf<M> veval_du(const vfloat<M>& t) const
- {
- const Vec4vf<M> b = CatmullRomBasis::derivative(t);
- return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
- }
-
- template<int M>
- __forceinline Vec4vf<M> veval_dudu(const vfloat<M>& t) const
- {
- const Vec4vf<M> b = CatmullRomBasis::derivative2(t);
- return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
- }
-
- template<int M>
- __forceinline void veval(const vfloat<M>& t, Vec4vf<M>& p, Vec4vf<M>& dp) const
- {
- p = veval(t);
- dp = veval_du(t);
- }
-
- template<int M>
- __forceinline Vec4vf<M> eval0(const int ofs, const int size) const
- {
- assert(size <= PrecomputedCatmullRomBasis::N);
- assert(ofs <= size);
- return madd(vfloat<M>::loadu(&catmullrom_basis0.c0[size][ofs]), Vec4vf<M>(v0),
- madd(vfloat<M>::loadu(&catmullrom_basis0.c1[size][ofs]), Vec4vf<M>(v1),
- madd(vfloat<M>::loadu(&catmullrom_basis0.c2[size][ofs]), Vec4vf<M>(v2),
- vfloat<M>::loadu(&catmullrom_basis0.c3[size][ofs]) * Vec4vf<M>(v3))));
- }
-
- template<int M>
- __forceinline Vec4vf<M> eval1(const int ofs, const int size) const
- {
- assert(size <= PrecomputedCatmullRomBasis::N);
- assert(ofs <= size);
- return madd(vfloat<M>::loadu(&catmullrom_basis1.c0[size][ofs]), Vec4vf<M>(v0),
- madd(vfloat<M>::loadu(&catmullrom_basis1.c1[size][ofs]), Vec4vf<M>(v1),
- madd(vfloat<M>::loadu(&catmullrom_basis1.c2[size][ofs]), Vec4vf<M>(v2),
- vfloat<M>::loadu(&catmullrom_basis1.c3[size][ofs]) * Vec4vf<M>(v3))));
- }
-
- template<int M>
- __forceinline Vec4vf<M> derivative0(const int ofs, const int size) const
- {
- assert(size <= PrecomputedCatmullRomBasis::N);
- assert(ofs <= size);
- return madd(vfloat<M>::loadu(&catmullrom_basis0.d0[size][ofs]), Vec4vf<M>(v0),
- madd(vfloat<M>::loadu(&catmullrom_basis0.d1[size][ofs]), Vec4vf<M>(v1),
- madd(vfloat<M>::loadu(&catmullrom_basis0.d2[size][ofs]), Vec4vf<M>(v2),
- vfloat<M>::loadu(&catmullrom_basis0.d3[size][ofs]) * Vec4vf<M>(v3))));
- }
-
- template<int M>
- __forceinline Vec4vf<M> derivative1(const int ofs, const int size) const
- {
- assert(size <= PrecomputedCatmullRomBasis::N);
- assert(ofs <= size);
- return madd(vfloat<M>::loadu(&catmullrom_basis1.d0[size][ofs]), Vec4vf<M>(v0),
- madd(vfloat<M>::loadu(&catmullrom_basis1.d1[size][ofs]), Vec4vf<M>(v1),
- madd(vfloat<M>::loadu(&catmullrom_basis1.d2[size][ofs]), Vec4vf<M>(v2),
- vfloat<M>::loadu(&catmullrom_basis1.d3[size][ofs]) * Vec4vf<M>(v3))));
- }
-
- /* calculates bounds of catmull-rom curve geometry */
- __forceinline BBox3fa accurateRoundBounds() const
- {
- const int N = 7;
- const float scale = 1.0f/(3.0f*(N-1));
- Vec4vfx pl(pos_inf), pu(neg_inf);
- for (int i=0; i<=N; i+=VSIZEX)
- {
- vintx vi = vintx(i)+vintx(step);
- vboolx valid = vi <= vintx(N);
- const Vec4vfx p = eval0<VSIZEX>(i,N);
- const Vec4vfx dp = derivative0<VSIZEX>(i,N);
- const Vec4vfx pm = p-Vec4vfx(scale)*select(vi!=vintx(0),dp,Vec4vfx(zero));
- const Vec4vfx pp = p+Vec4vfx(scale)*select(vi!=vintx(N),dp,Vec4vfx(zero));
- pl = select(valid,min(pl,p,pm,pp),pl); // FIXME: use masked min
- pu = select(valid,max(pu,p,pm,pp),pu); // FIXME: use masked min
- }
- const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
- const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
- const float r_min = reduce_min(pl.w);
- const float r_max = reduce_max(pu.w);
- const Vec3fa upper_r = Vec3fa(max(abs(r_min),abs(r_max)));
- return enlarge(BBox3fa(lower,upper),upper_r);
- }
-
- /* calculates bounds when tessellated into N line segments */
- __forceinline BBox3fa accurateFlatBounds(int N) const
- {
- if (likely(N == 4))
- {
- const Vec4vf4 pi = eval0<4>(0,4);
- const Vec3fa lower(reduce_min(pi.x),reduce_min(pi.y),reduce_min(pi.z));
- const Vec3fa upper(reduce_max(pi.x),reduce_max(pi.y),reduce_max(pi.z));
- const Vec3fa upper_r = Vec3fa(reduce_max(abs(pi.w)));
- const Vec3ff pe = end();
- return enlarge(BBox3fa(min(lower,pe),max(upper,pe)),max(upper_r,Vec3fa(abs(pe.w))));
- }
- else
- {
- Vec3vfx pl(pos_inf), pu(neg_inf); vfloatx ru(0.0f);
- for (int i=0; i<=N; i+=VSIZEX)
- {
- vboolx valid = vintx(i)+vintx(step) <= vintx(N);
- const Vec4vfx pi = eval0<VSIZEX>(i,N);
-
- pl.x = select(valid,min(pl.x,pi.x),pl.x); // FIXME: use masked min
- pl.y = select(valid,min(pl.y,pi.y),pl.y);
- pl.z = select(valid,min(pl.z,pi.z),pl.z);
-
- pu.x = select(valid,max(pu.x,pi.x),pu.x); // FIXME: use masked min
- pu.y = select(valid,max(pu.y,pi.y),pu.y);
- pu.z = select(valid,max(pu.z,pi.z),pu.z);
-
- ru = select(valid,max(ru,abs(pi.w)),ru);
- }
- const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
- const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
- const Vec3fa upper_r(reduce_max(ru));
- return enlarge(BBox3fa(lower,upper),upper_r);
- }
- }
-
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const CatmullRomCurveT& curve) {
- return cout << "CatmullRomCurve { v0 = " << curve.v0 << ", v1 = " << curve.v1 << ", v2 = " << curve.v2 << ", v3 = " << curve.v3 << " }";
- }
- };
-
- __forceinline CatmullRomCurveT<Vec3ff> enlargeRadiusToMinWidth(const IntersectContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const CatmullRomCurveT<Vec3ff>& curve)
- {
- return CatmullRomCurveT<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,curve.v0),
- enlargeRadiusToMinWidth(context,geom,ray_org,curve.v1),
- enlargeRadiusToMinWidth(context,geom,ray_org,curve.v2),
- enlargeRadiusToMinWidth(context,geom,ray_org,curve.v3));
- }
-
- typedef CatmullRomCurveT<Vec3fa> CatmullRomCurve3fa;
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval.h b/thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval.h
deleted file mode 100644
index 23f24c360c..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval.h
+++ /dev/null
@@ -1,226 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "patch.h"
-
-namespace embree
-{
- namespace isa
- {
- template<typename Vertex, typename Vertex_t = Vertex>
- struct FeatureAdaptiveEval
- {
- public:
-
- typedef PatchT<Vertex,Vertex_t> Patch;
- typedef typename Patch::Ref Ref;
- typedef GeneralCatmullClarkPatchT<Vertex,Vertex_t> GeneralCatmullClarkPatch;
- typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClarkRing;
- typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
- typedef BSplinePatchT<Vertex,Vertex_t> BSplinePatch;
- typedef BezierPatchT<Vertex,Vertex_t> BezierPatch;
- typedef GregoryPatchT<Vertex,Vertex_t> GregoryPatch;
- typedef BilinearPatchT<Vertex,Vertex_t> BilinearPatch;
- typedef BezierCurveT<Vertex> BezierCurve;
-
- public:
-
- FeatureAdaptiveEval (const HalfEdge* edge, const char* vertices, size_t stride, const float u, const float v,
- Vertex* P, Vertex* dPdu, Vertex* dPdv, Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv)
- : P(P), dPdu(dPdu), dPdv(dPdv), ddPdudu(ddPdudu), ddPdvdv(ddPdvdv), ddPdudv(ddPdudv)
- {
- switch (edge->patch_type) {
- case HalfEdge::BILINEAR_PATCH: BilinearPatch(edge,vertices,stride).eval(u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,1.0f); break;
- case HalfEdge::REGULAR_QUAD_PATCH: RegularPatchT(edge,vertices,stride).eval(u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,1.0f); break;
-#if PATCH_USE_GREGORY == 2
- case HalfEdge::IRREGULAR_QUAD_PATCH: GregoryPatch(edge,vertices,stride).eval(u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,1.0f); break;
-#endif
- default: {
- GeneralCatmullClarkPatch patch(edge,vertices,stride);
- eval(patch,Vec2f(u,v),0);
- break;
- }
- }
- }
-
- FeatureAdaptiveEval (CatmullClarkPatch& patch, const float u, const float v, float dscale, size_t depth,
- Vertex* P, Vertex* dPdu, Vertex* dPdv, Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv)
- : P(P), dPdu(dPdu), dPdv(dPdv), ddPdudu(ddPdudu), ddPdvdv(ddPdvdv), ddPdudv(ddPdudv)
- {
- eval(patch,Vec2f(u,v),dscale,depth);
- }
-
- void eval_general_quad(const GeneralCatmullClarkPatch& patch, array_t<CatmullClarkPatch,GeneralCatmullClarkPatch::SIZE>& patches, const Vec2f& uv, size_t depth)
- {
- float u = uv.x, v = uv.y;
- if (v < 0.5f) {
- if (u < 0.5f) {
-#if PATCH_USE_GREGORY == 2
- BezierCurve borders[2]; patch.getLimitBorder(borders,0);
- BezierCurve border0l,border0r; borders[0].subdivide(border0l,border0r);
- BezierCurve border2l,border2r; borders[1].subdivide(border2l,border2r);
- eval(patches[0],Vec2f(2.0f*u,2.0f*v),2.0f,depth+1, &border0l, nullptr, nullptr, &border2r);
-#else
- eval(patches[0],Vec2f(2.0f*u,2.0f*v),2.0f,depth+1);
-#endif
- if (dPdu && dPdv) {
- const Vertex dpdx = *dPdu, dpdy = *dPdv;
- *dPdu = dpdx; *dPdv = dpdy;
- }
- }
- else {
-#if PATCH_USE_GREGORY == 2
- BezierCurve borders[2]; patch.getLimitBorder(borders,1);
- BezierCurve border0l,border0r; borders[0].subdivide(border0l,border0r);
- BezierCurve border2l,border2r; borders[1].subdivide(border2l,border2r);
- eval(patches[1],Vec2f(2.0f*v,2.0f-2.0f*u),2.0f,depth+1, &border0l, nullptr, nullptr, &border2r);
-#else
- eval(patches[1],Vec2f(2.0f*v,2.0f-2.0f*u),2.0f,depth+1);
-#endif
- if (dPdu && dPdv) {
- const Vertex dpdx = *dPdu, dpdy = *dPdv;
- *dPdu = -dpdy; *dPdv = dpdx;
- }
- }
- } else {
- if (u > 0.5f) {
-#if PATCH_USE_GREGORY == 2
- BezierCurve borders[2]; patch.getLimitBorder(borders,2);
- BezierCurve border0l,border0r; borders[0].subdivide(border0l,border0r);
- BezierCurve border2l,border2r; borders[1].subdivide(border2l,border2r);
- eval(patches[2],Vec2f(2.0f-2.0f*u,2.0f-2.0f*v),2.0f,depth+1, &border0l, nullptr, nullptr, &border2r);
-#else
- eval(patches[2],Vec2f(2.0f-2.0f*u,2.0f-2.0f*v),2.0f,depth+1);
-#endif
- if (dPdu && dPdv) {
- const Vertex dpdx = *dPdu, dpdy = *dPdv;
- *dPdu = -dpdx; *dPdv = -dpdy;
- }
- }
- else {
-#if PATCH_USE_GREGORY == 2
- BezierCurve borders[2]; patch.getLimitBorder(borders,3);
- BezierCurve border0l,border0r; borders[0].subdivide(border0l,border0r);
- BezierCurve border2l,border2r; borders[1].subdivide(border2l,border2r);
- eval(patches[3],Vec2f(2.0f-2.0f*v,2.0f*u),2.0f,depth+1, &border0l, nullptr, nullptr, &border2r);
-#else
- eval(patches[3],Vec2f(2.0f-2.0f*v,2.0f*u),2.0f,depth+1);
-#endif
- if (dPdu && dPdv) {
- const Vertex dpdx = *dPdu, dpdy = *dPdv;
- *dPdu = dpdy; *dPdv = -dpdx;
- }
- }
- }
- }
-
- __forceinline bool final(const CatmullClarkPatch& patch, const typename CatmullClarkRing::Type type, size_t depth)
- {
- const int max_eval_depth = (type & CatmullClarkRing::TYPE_CREASES) ? PATCH_MAX_EVAL_DEPTH_CREASE : PATCH_MAX_EVAL_DEPTH_IRREGULAR;
-//#if PATCH_MIN_RESOLUTION
-// return patch.isFinalResolution(PATCH_MIN_RESOLUTION) || depth>=(size_t)max_eval_depth;
-//#else
- return depth>=(size_t)max_eval_depth;
-//#endif
- }
-
- void eval(CatmullClarkPatch& patch, Vec2f uv, float dscale, size_t depth,
- BezierCurve* border0 = nullptr, BezierCurve* border1 = nullptr, BezierCurve* border2 = nullptr, BezierCurve* border3 = nullptr)
- {
- while (true)
- {
- typename CatmullClarkPatch::Type ty = patch.type();
-
- if (unlikely(final(patch,ty,depth)))
- {
- if (ty & CatmullClarkRing::TYPE_REGULAR) {
- RegularPatch(patch,border0,border1,border2,border3).eval(uv.x,uv.y,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale);
- PATCH_DEBUG_SUBDIVISION(234423,c,c,-1);
- return;
- } else {
- IrregularFillPatch(patch,border0,border1,border2,border3).eval(uv.x,uv.y,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale);
- PATCH_DEBUG_SUBDIVISION(34534,c,-1,c);
- return;
- }
- }
- else if (ty & CatmullClarkRing::TYPE_REGULAR_CREASES) {
- assert(depth > 0);
- RegularPatch(patch,border0,border1,border2,border3).eval(uv.x,uv.y,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale);
- PATCH_DEBUG_SUBDIVISION(43524,c,c,-1);
- return;
- }
-#if PATCH_USE_GREGORY == 2
- else if (ty & CatmullClarkRing::TYPE_GREGORY_CREASES) {
- assert(depth > 0);
- GregoryPatch(patch,border0,border1,border2,border3).eval(uv.x,uv.y,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale);
- PATCH_DEBUG_SUBDIVISION(23498,c,-1,c);
- return;
- }
-#endif
- else
- {
- array_t<CatmullClarkPatch,4> patches;
- patch.subdivide(patches); // FIXME: only have to generate one of the patches
-
- const float u = uv.x, v = uv.y;
- if (v < 0.5f) {
- if (u < 0.5f) { patch = patches[0]; uv = Vec2f(2.0f*u,2.0f*v); dscale *= 2.0f; }
- else { patch = patches[1]; uv = Vec2f(2.0f*u-1.0f,2.0f*v); dscale *= 2.0f; }
- } else {
- if (u > 0.5f) { patch = patches[2]; uv = Vec2f(2.0f*u-1.0f,2.0f*v-1.0f); dscale *= 2.0f; }
- else { patch = patches[3]; uv = Vec2f(2.0f*u,2.0f*v-1.0f); dscale *= 2.0f; }
- }
- depth++;
- }
- }
- }
-
- void eval(const GeneralCatmullClarkPatch& patch, const Vec2f& uv, const size_t depth)
- {
- /* convert into standard quad patch if possible */
- if (likely(patch.isQuadPatch()))
- {
- CatmullClarkPatch qpatch; patch.init(qpatch);
- return eval(qpatch,uv,1.0f,depth);
- }
-
- /* subdivide patch */
- unsigned N;
- array_t<CatmullClarkPatch,GeneralCatmullClarkPatch::SIZE> patches;
- patch.subdivide(patches,N); // FIXME: only have to generate one of the patches
-
- /* parametrization for quads */
- if (N == 4)
- eval_general_quad(patch,patches,uv,depth);
-
- /* parametrization for arbitrary polygons */
- else
- {
- const unsigned l = (unsigned) floor(0.5f*uv.x); const float u = 2.0f*frac(0.5f*uv.x)-0.5f;
- const unsigned h = (unsigned) floor(0.5f*uv.y); const float v = 2.0f*frac(0.5f*uv.y)-0.5f;
- const unsigned i = 4*h+l; assert(i<N);
- if (i >= N) return;
-
-#if PATCH_USE_GREGORY == 2
- BezierCurve borders[2]; patch.getLimitBorder(borders,i);
- BezierCurve border0l,border0r; borders[0].subdivide(border0l,border0r);
- BezierCurve border2l,border2r; borders[1].subdivide(border2l,border2r);
- eval(patches[i],Vec2f(u,v),1.0f,depth+1, &border0l, nullptr, nullptr, &border2r);
-#else
- eval(patches[i],Vec2f(u,v),1.0f,depth+1);
-#endif
- }
- }
-
- private:
- Vertex* const P;
- Vertex* const dPdu;
- Vertex* const dPdv;
- Vertex* const ddPdudu;
- Vertex* const ddPdvdv;
- Vertex* const ddPdudv;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval_grid.h b/thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval_grid.h
deleted file mode 100644
index 76583b2e5d..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval_grid.h
+++ /dev/null
@@ -1,359 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "patch.h"
-#include "catmullclark_patch.h"
-#include "bspline_patch.h"
-#include "gregory_patch.h"
-#include "tessellation.h"
-
-namespace embree
-{
- namespace isa
- {
- struct FeatureAdaptiveEvalGrid
- {
- typedef CatmullClark1Ring3fa CatmullClarkRing;
- typedef CatmullClarkPatch3fa CatmullClarkPatch;
- typedef BilinearPatch3fa BilinearPatch;
- typedef BSplinePatch3fa BSplinePatch;
- typedef BezierPatch3fa BezierPatch;
- typedef GregoryPatch3fa GregoryPatch;
-
- private:
- const unsigned x0,x1;
- const unsigned y0,y1;
- const unsigned swidth,sheight;
- const float rcp_swidth, rcp_sheight;
- float* const Px;
- float* const Py;
- float* const Pz;
- float* const U;
- float* const V;
- float* const Nx;
- float* const Ny;
- float* const Nz;
- const unsigned dwidth;
- //const unsigned dheight;
- unsigned count;
-
-
- public:
- FeatureAdaptiveEvalGrid (const GeneralCatmullClarkPatch3fa& patch, unsigned subPatch,
- const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
- float* Px, float* Py, float* Pz, float* U, float* V,
- float* Nx, float* Ny, float* Nz,
- const unsigned dwidth, const unsigned dheight)
- : x0(x0), x1(x1), y0(y0), y1(y1), swidth(swidth), sheight(sheight), rcp_swidth(1.0f/(swidth-1.0f)), rcp_sheight(1.0f/(sheight-1.0f)),
- Px(Px), Py(Py), Pz(Pz), U(U), V(V), Nx(Nx), Ny(Ny), Nz(Nz), dwidth(dwidth), /*dheight(dheight),*/ count(0)
- {
- assert(swidth < (2<<20) && sheight < (2<<20));
- const BBox2f srange(Vec2f(0.0f,0.0f),Vec2f(float(swidth-1),float(sheight-1)));
- const BBox2f erange(Vec2f((float)x0,(float)y0),Vec2f((float)x1,(float)y1));
-
- /* convert into standard quad patch if possible */
- if (likely(patch.isQuadPatch()))
- {
- CatmullClarkPatch3fa qpatch; patch.init(qpatch);
- eval(qpatch, srange, erange, 0);
- assert(count == (x1-x0+1)*(y1-y0+1));
- return;
- }
-
- /* subdivide patch */
- unsigned N;
- array_t<CatmullClarkPatch3fa,GeneralCatmullClarkPatch3fa::SIZE> patches;
- patch.subdivide(patches,N);
-
- if (N == 4)
- {
- const Vec2f c = srange.center();
- const BBox2f srange0(srange.lower,c);
- const BBox2f srange1(Vec2f(c.x,srange.lower.y),Vec2f(srange.upper.x,c.y));
- const BBox2f srange2(c,srange.upper);
- const BBox2f srange3(Vec2f(srange.lower.x,c.y),Vec2f(c.x,srange.upper.y));
-
-#if PATCH_USE_GREGORY == 2
- BezierCurve3fa borders[GeneralCatmullClarkPatch3fa::SIZE]; patch.getLimitBorder(borders);
- BezierCurve3fa border0l,border0r; borders[0].subdivide(border0l,border0r);
- BezierCurve3fa border1l,border1r; borders[1].subdivide(border1l,border1r);
- BezierCurve3fa border2l,border2r; borders[2].subdivide(border2l,border2r);
- BezierCurve3fa border3l,border3r; borders[3].subdivide(border3l,border3r);
- GeneralCatmullClarkPatch3fa::fix_quad_ring_order(patches);
- eval(patches[0],srange0,intersect(srange0,erange),1,&border0l,nullptr,nullptr,&border3r);
- eval(patches[1],srange1,intersect(srange1,erange),1,&border0r,&border1l,nullptr,nullptr);
- eval(patches[2],srange2,intersect(srange2,erange),1,nullptr,&border1r,&border2l,nullptr);
- eval(patches[3],srange3,intersect(srange3,erange),1,nullptr,nullptr,&border2r,&border3l);
-#else
- GeneralCatmullClarkPatch3fa::fix_quad_ring_order(patches);
- eval(patches[0],srange0,intersect(srange0,erange),1);
- eval(patches[1],srange1,intersect(srange1,erange),1);
- eval(patches[2],srange2,intersect(srange2,erange),1);
- eval(patches[3],srange3,intersect(srange3,erange),1);
-#endif
- }
- else
- {
- assert(subPatch < N);
-
-#if PATCH_USE_GREGORY == 2
- BezierCurve3fa borders[2]; patch.getLimitBorder(borders,subPatch);
- BezierCurve3fa border0l,border0r; borders[0].subdivide(border0l,border0r);
- BezierCurve3fa border2l,border2r; borders[1].subdivide(border2l,border2r);
- eval(patches[subPatch], srange, erange, 1, &border0l, nullptr, nullptr, &border2r);
-#else
- eval(patches[subPatch], srange, erange, 1);
-#endif
-
- }
- assert(count == (x1-x0+1)*(y1-y0+1));
- }
-
- FeatureAdaptiveEvalGrid (const CatmullClarkPatch3fa& patch,
- const BBox2f& srange, const BBox2f& erange, const unsigned depth,
- const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
- float* Px, float* Py, float* Pz, float* U, float* V,
- float* Nx, float* Ny, float* Nz,
- const unsigned dwidth, const unsigned dheight)
- : x0(x0), x1(x1), y0(y0), y1(y1), swidth(swidth), sheight(sheight), rcp_swidth(1.0f/(swidth-1.0f)), rcp_sheight(1.0f/(sheight-1.0f)),
- Px(Px), Py(Py), Pz(Pz), U(U), V(V), Nx(Nx), Ny(Ny), Nz(Nz), dwidth(dwidth), /*dheight(dheight),*/ count(0)
- {
- eval(patch,srange,erange,depth);
- }
-
- template<typename Patch>
- void evalLocalGrid(const Patch& patch, const BBox2f& srange, const int lx0, const int lx1, const int ly0, const int ly1)
- {
- const float scale_x = rcp(srange.upper.x-srange.lower.x);
- const float scale_y = rcp(srange.upper.y-srange.lower.y);
- count += (lx1-lx0)*(ly1-ly0);
-
-#if 0
- for (unsigned iy=ly0; iy<ly1; iy++) {
- for (unsigned ix=lx0; ix<lx1; ix++) {
- const float lu = select(ix == swidth -1, float(1.0f), (float(ix)-srange.lower.x)*scale_x);
- const float lv = select(iy == sheight-1, float(1.0f), (float(iy)-srange.lower.y)*scale_y);
- const Vec3fa p = patch.eval(lu,lv);
- const float u = float(ix)*rcp_swidth;
- const float v = float(iy)*rcp_sheight;
- const int ofs = (iy-y0)*dwidth+(ix-x0);
- Px[ofs] = p.x;
- Py[ofs] = p.y;
- Pz[ofs] = p.z;
- U[ofs] = u;
- V[ofs] = v;
- }
- }
-#else
- foreach2(lx0,lx1,ly0,ly1,[&](const vboolx& valid, const vintx& ix, const vintx& iy) {
- const vfloatx lu = select(ix == swidth -1, vfloatx(1.0f), (vfloatx(ix)-srange.lower.x)*scale_x);
- const vfloatx lv = select(iy == sheight-1, vfloatx(1.0f), (vfloatx(iy)-srange.lower.y)*scale_y);
- const Vec3vfx p = patch.eval(lu,lv);
- Vec3vfx n = zero;
- if (unlikely(Nx != nullptr)) n = normalize_safe(patch.normal(lu,lv));
- const vfloatx u = vfloatx(ix)*rcp_swidth;
- const vfloatx v = vfloatx(iy)*rcp_sheight;
- const vintx ofs = (iy-y0)*dwidth+(ix-x0);
- if (likely(all(valid)) && all(iy==iy[0])) {
- const unsigned ofs2 = ofs[0];
- vfloatx::storeu(Px+ofs2,p.x);
- vfloatx::storeu(Py+ofs2,p.y);
- vfloatx::storeu(Pz+ofs2,p.z);
- vfloatx::storeu(U+ofs2,u);
- vfloatx::storeu(V+ofs2,v);
- if (unlikely(Nx != nullptr)) {
- vfloatx::storeu(Nx+ofs2,n.x);
- vfloatx::storeu(Ny+ofs2,n.y);
- vfloatx::storeu(Nz+ofs2,n.z);
- }
- } else {
- foreach_unique_index(valid,iy,[&](const vboolx& valid, const int iy0, const int j) {
- const unsigned ofs2 = ofs[j]-j;
- vfloatx::storeu(valid,Px+ofs2,p.x);
- vfloatx::storeu(valid,Py+ofs2,p.y);
- vfloatx::storeu(valid,Pz+ofs2,p.z);
- vfloatx::storeu(valid,U+ofs2,u);
- vfloatx::storeu(valid,V+ofs2,v);
- if (unlikely(Nx != nullptr)) {
- vfloatx::storeu(valid,Nx+ofs2,n.x);
- vfloatx::storeu(valid,Ny+ofs2,n.y);
- vfloatx::storeu(valid,Nz+ofs2,n.z);
- }
- });
- }
- });
-#endif
- }
-
- __forceinline bool final(const CatmullClarkPatch3fa& patch, const CatmullClarkRing::Type type, unsigned depth)
- {
- const unsigned max_eval_depth = (type & CatmullClarkRing::TYPE_CREASES) ? PATCH_MAX_EVAL_DEPTH_CREASE : PATCH_MAX_EVAL_DEPTH_IRREGULAR;
-//#if PATCH_MIN_RESOLUTION
-// return patch.isFinalResolution(PATCH_MIN_RESOLUTION) || depth>=max_eval_depth;
-//#else
- return depth>=max_eval_depth;
-//#endif
- }
-
- void eval(const CatmullClarkPatch3fa& patch, const BBox2f& srange, const BBox2f& erange, const unsigned depth,
- const BezierCurve3fa* border0 = nullptr, const BezierCurve3fa* border1 = nullptr, const BezierCurve3fa* border2 = nullptr, const BezierCurve3fa* border3 = nullptr)
- {
- if (erange.empty())
- return;
-
- int lx0 = (int) ceilf(erange.lower.x);
- int lx1 = (int) ceilf(erange.upper.x) + (erange.upper.x == x1 && (srange.lower.x < erange.upper.x || erange.upper.x == 0));
- int ly0 = (int) ceilf(erange.lower.y);
- int ly1 = (int) ceilf(erange.upper.y) + (erange.upper.y == y1 && (srange.lower.y < erange.upper.y || erange.upper.y == 0));
- if (lx0 >= lx1 || ly0 >= ly1) return;
-
- CatmullClarkPatch::Type ty = patch.type();
-
- if (unlikely(final(patch,ty,depth)))
- {
- if (ty & CatmullClarkRing::TYPE_REGULAR) {
- RegularPatch rpatch(patch,border0,border1,border2,border3);
- evalLocalGrid(rpatch,srange,lx0,lx1,ly0,ly1);
- return;
- } else {
- IrregularFillPatch ipatch(patch,border0,border1,border2,border3);
- evalLocalGrid(ipatch,srange,lx0,lx1,ly0,ly1);
- return;
- }
- }
- else if (ty & CatmullClarkRing::TYPE_REGULAR_CREASES) {
- assert(depth > 0);
- RegularPatch rpatch(patch,border0,border1,border2,border3);
- evalLocalGrid(rpatch,srange,lx0,lx1,ly0,ly1);
- return;
- }
-#if PATCH_USE_GREGORY == 2
- else if (ty & CatmullClarkRing::TYPE_GREGORY_CREASES) {
- assert(depth > 0);
- GregoryPatch gpatch(patch,border0,border1,border2,border3);
- evalLocalGrid(gpatch,srange,lx0,lx1,ly0,ly1);
- }
-#endif
- else
- {
- array_t<CatmullClarkPatch3fa,4> patches;
- patch.subdivide(patches);
-
- const Vec2f c = srange.center();
- const BBox2f srange0(srange.lower,c);
- const BBox2f srange1(Vec2f(c.x,srange.lower.y),Vec2f(srange.upper.x,c.y));
- const BBox2f srange2(c,srange.upper);
- const BBox2f srange3(Vec2f(srange.lower.x,c.y),Vec2f(c.x,srange.upper.y));
-
- eval(patches[0],srange0,intersect(srange0,erange),depth+1);
- eval(patches[1],srange1,intersect(srange1,erange),depth+1);
- eval(patches[2],srange2,intersect(srange2,erange),depth+1);
- eval(patches[3],srange3,intersect(srange3,erange),depth+1);
- }
- }
- };
-
- template<typename Eval, typename Patch>
- bool stitch_col(const Patch& patch, int subPatch,
- const bool right, const unsigned y0, const unsigned y1, const int fine_y, const int coarse_y,
- float* Px, float* Py, float* Pz, float* U, float* V, float* Nx, float* Ny, float* Nz, const unsigned dx0, const unsigned dwidth, const unsigned dheight)
- {
- assert(coarse_y <= fine_y);
- if (likely(fine_y == coarse_y))
- return false;
-
- const unsigned y0s = stitch(y0,fine_y,coarse_y);
- const unsigned y1s = stitch(y1,fine_y,coarse_y);
- const unsigned M = y1s-y0s+1 + VSIZEX;
-
- dynamic_large_stack_array(float,px,M,64*sizeof(float));
- dynamic_large_stack_array(float,py,M,64*sizeof(float));
- dynamic_large_stack_array(float,pz,M,64*sizeof(float));
- dynamic_large_stack_array(float,u,M,64*sizeof(float));
- dynamic_large_stack_array(float,v,M,64*sizeof(float));
- dynamic_large_stack_array(float,nx,M,64*sizeof(float));
- dynamic_large_stack_array(float,ny,M,64*sizeof(float));
- dynamic_large_stack_array(float,nz,M,64*sizeof(float));
- const bool has_Nxyz = Nx; assert(!Nx || (Ny && Nz));
- Eval(patch,subPatch, right,right, y0s,y1s, 2,coarse_y+1, px,py,pz,u,v,
- has_Nxyz ? (float*)nx : nullptr,has_Nxyz ? (float*)ny : nullptr ,has_Nxyz ? (float*)nz : nullptr, 1,4097);
-
- for (unsigned y=y0; y<=y1; y++)
- {
- const unsigned ys = stitch(y,fine_y,coarse_y)-y0s;
- Px[(y-y0)*dwidth+dx0] = px[ys];
- Py[(y-y0)*dwidth+dx0] = py[ys];
- Pz[(y-y0)*dwidth+dx0] = pz[ys];
- U [(y-y0)*dwidth+dx0] = u[ys];
- V [(y-y0)*dwidth+dx0] = v[ys];
- if (unlikely(has_Nxyz)) {
- Nx[(y-y0)*dwidth+dx0] = nx[ys];
- Ny[(y-y0)*dwidth+dx0] = ny[ys];
- Nz[(y-y0)*dwidth+dx0] = nz[ys];
- }
- }
- return true;
- }
-
- template<typename Eval, typename Patch>
- bool stitch_row(const Patch& patch, int subPatch,
- const bool bottom, const unsigned x0, const unsigned x1, const int fine_x, const int coarse_x,
- float* Px, float* Py, float* Pz, float* U, float* V, float* Nx, float* Ny, float* Nz, const unsigned dy0, const unsigned dwidth, const unsigned dheight)
- {
- assert(coarse_x <= fine_x);
- if (likely(fine_x == coarse_x))
- return false;
-
- const unsigned x0s = stitch(x0,fine_x,coarse_x);
- const unsigned x1s = stitch(x1,fine_x,coarse_x);
- const unsigned M = x1s-x0s+1 + VSIZEX;
-
- dynamic_large_stack_array(float,px,M,32*sizeof(float));
- dynamic_large_stack_array(float,py,M,32*sizeof(float));
- dynamic_large_stack_array(float,pz,M,32*sizeof(float));
- dynamic_large_stack_array(float,u,M,32*sizeof(float));
- dynamic_large_stack_array(float,v,M,32*sizeof(float));
- dynamic_large_stack_array(float,nx,M,32*sizeof(float));
- dynamic_large_stack_array(float,ny,M,32*sizeof(float));
- dynamic_large_stack_array(float,nz,M,32*sizeof(float));
- const bool has_Nxyz = Nx; assert(!Nx || (Ny && Nz));
- Eval(patch,subPatch, x0s,x1s, bottom,bottom, coarse_x+1,2, px,py,pz,u,v,
- has_Nxyz ? (float*)nx :nullptr, has_Nxyz ? (float*)ny : nullptr , has_Nxyz ? (float*)nz : nullptr, 4097,1);
-
- for (unsigned x=x0; x<=x1; x++)
- {
- const unsigned xs = stitch(x,fine_x,coarse_x)-x0s;
- Px[dy0*dwidth+x-x0] = px[xs];
- Py[dy0*dwidth+x-x0] = py[xs];
- Pz[dy0*dwidth+x-x0] = pz[xs];
- U [dy0*dwidth+x-x0] = u[xs];
- V [dy0*dwidth+x-x0] = v[xs];
- if (unlikely(has_Nxyz)) {
- Nx[dy0*dwidth+x-x0] = nx[xs];
- Ny[dy0*dwidth+x-x0] = ny[xs];
- Nz[dy0*dwidth+x-x0] = nz[xs];
- }
- }
- return true;
- }
-
- template<typename Eval, typename Patch>
- void feature_adaptive_eval_grid (const Patch& patch, unsigned subPatch, const float levels[4],
- const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
- float* Px, float* Py, float* Pz, float* U, float* V, float* Nx, float* Ny, float* Nz, const unsigned dwidth, const unsigned dheight)
- {
- bool sl = false, sr = false, st = false, sb = false;
- if (levels) {
- sl = x0 == 0 && stitch_col<Eval,Patch>(patch,subPatch,0,y0,y1,sheight-1,int(levels[3]), Px,Py,Pz,U,V,Nx,Ny,Nz, 0 ,dwidth,dheight);
- sr = x1 == swidth-1 && stitch_col<Eval,Patch>(patch,subPatch,1,y0,y1,sheight-1,int(levels[1]), Px,Py,Pz,U,V,Nx,Ny,Nz, x1-x0,dwidth,dheight);
- st = y0 == 0 && stitch_row<Eval,Patch>(patch,subPatch,0,x0,x1,swidth-1,int(levels[0]), Px,Py,Pz,U,V,Nx,Ny,Nz, 0 ,dwidth,dheight);
- sb = y1 == sheight-1 && stitch_row<Eval,Patch>(patch,subPatch,1,x0,x1,swidth-1,int(levels[2]), Px,Py,Pz,U,V,Nx,Ny,Nz, y1-y0,dwidth,dheight);
- }
- const unsigned ofs = st*dwidth+sl;
- Eval(patch,subPatch,x0+sl,x1-sr,y0+st,y1-sb, swidth,sheight, Px+ofs,Py+ofs,Pz+ofs,U+ofs,V+ofs,Nx?Nx+ofs:nullptr,Ny?Ny+ofs:nullptr,Nz?Nz+ofs:nullptr, dwidth,dheight);
- }
- }
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval_simd.h b/thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval_simd.h
deleted file mode 100644
index fa3216730f..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval_simd.h
+++ /dev/null
@@ -1,186 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "patch.h"
-
-namespace embree
-{
- namespace isa
- {
- template<typename vbool, typename vint, typename vfloat, typename Vertex, typename Vertex_t = Vertex>
- struct FeatureAdaptiveEvalSimd
- {
- public:
-
- typedef PatchT<Vertex,Vertex_t> Patch;
- typedef typename Patch::Ref Ref;
- typedef GeneralCatmullClarkPatchT<Vertex,Vertex_t> GeneralCatmullClarkPatch;
- typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClarkRing;
- typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
- typedef BSplinePatchT<Vertex,Vertex_t> BSplinePatch;
- typedef BezierPatchT<Vertex,Vertex_t> BezierPatch;
- typedef GregoryPatchT<Vertex,Vertex_t> GregoryPatch;
- typedef BilinearPatchT<Vertex,Vertex_t> BilinearPatch;
- typedef BezierCurveT<Vertex> BezierCurve;
-
- FeatureAdaptiveEvalSimd (const HalfEdge* edge, const char* vertices, size_t stride, const vbool& valid, const vfloat& u, const vfloat& v,
- float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv, const size_t dstride, const size_t N)
- : P(P), dPdu(dPdu), dPdv(dPdv), ddPdudu(ddPdudu), ddPdvdv(ddPdvdv), ddPdudv(ddPdudv), dstride(dstride), N(N)
- {
- switch (edge->patch_type) {
- case HalfEdge::BILINEAR_PATCH: BilinearPatch(edge,vertices,stride).eval(valid,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,1.0f,dstride,N); break;
- case HalfEdge::REGULAR_QUAD_PATCH: RegularPatchT(edge,vertices,stride).eval(valid,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,1.0f,dstride,N); break;
-#if PATCH_USE_GREGORY == 2
- case HalfEdge::IRREGULAR_QUAD_PATCH: GregoryPatchT<Vertex,Vertex_t>(edge,vertices,stride).eval(valid,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,1.0f,dstride,N); break;
-#endif
- default: {
- GeneralCatmullClarkPatch patch(edge,vertices,stride);
- eval_direct(valid,patch,Vec2<vfloat>(u,v),0);
- break;
- }
- }
- }
-
- FeatureAdaptiveEvalSimd (const CatmullClarkPatch& patch, const vbool& valid, const vfloat& u, const vfloat& v, float dscale, size_t depth,
- float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv, const size_t dstride, const size_t N)
- : P(P), dPdu(dPdu), dPdv(dPdv), ddPdudu(ddPdudu), ddPdvdv(ddPdvdv), ddPdudv(ddPdudv), dstride(dstride), N(N)
- {
- eval_direct(valid,patch,Vec2<vfloat>(u,v),dscale,depth);
- }
-
- template<size_t N>
- __forceinline void eval_quad_direct(const vbool& valid, array_t<CatmullClarkPatch,N>& patches, const Vec2<vfloat>& uv, float dscale, size_t depth)
- {
- const vfloat u = uv.x, v = uv.y;
- const vbool u0_mask = u < 0.5f, u1_mask = u >= 0.5f;
- const vbool v0_mask = v < 0.5f, v1_mask = v >= 0.5f;
- const vbool u0v0_mask = valid & u0_mask & v0_mask;
- const vbool u0v1_mask = valid & u0_mask & v1_mask;
- const vbool u1v0_mask = valid & u1_mask & v0_mask;
- const vbool u1v1_mask = valid & u1_mask & v1_mask;
- if (any(u0v0_mask)) eval_direct(u0v0_mask,patches[0],Vec2<vfloat>(2.0f*u,2.0f*v),2.0f*dscale,depth+1);
- if (any(u1v0_mask)) eval_direct(u1v0_mask,patches[1],Vec2<vfloat>(2.0f*u-1.0f,2.0f*v),2.0f*dscale,depth+1);
- if (any(u1v1_mask)) eval_direct(u1v1_mask,patches[2],Vec2<vfloat>(2.0f*u-1.0f,2.0f*v-1.0f),2.0f*dscale,depth+1);
- if (any(u0v1_mask)) eval_direct(u0v1_mask,patches[3],Vec2<vfloat>(2.0f*u,2.0f*v-1.0f),2.0f*dscale,depth+1);
- }
-
- template<size_t N>
- __forceinline void eval_general_quad_direct(const vbool& valid, const GeneralCatmullClarkPatch& patch, array_t<CatmullClarkPatch,N>& patches, const Vec2<vfloat>& uv, float dscale, size_t depth)
- {
-#if PATCH_USE_GREGORY == 2
- BezierCurve borders[GeneralCatmullClarkPatch::SIZE]; patch.getLimitBorder(borders);
- BezierCurve border0l,border0r; borders[0].subdivide(border0l,border0r);
- BezierCurve border1l,border1r; borders[1].subdivide(border1l,border1r);
- BezierCurve border2l,border2r; borders[2].subdivide(border2l,border2r);
- BezierCurve border3l,border3r; borders[3].subdivide(border3l,border3r);
-#endif
- GeneralCatmullClarkPatch::fix_quad_ring_order(patches);
- const vfloat u = uv.x, v = uv.y;
- const vbool u0_mask = u < 0.5f, u1_mask = u >= 0.5f;
- const vbool v0_mask = v < 0.5f, v1_mask = v >= 0.5f;
- const vbool u0v0_mask = valid & u0_mask & v0_mask;
- const vbool u0v1_mask = valid & u0_mask & v1_mask;
- const vbool u1v0_mask = valid & u1_mask & v0_mask;
- const vbool u1v1_mask = valid & u1_mask & v1_mask;
-#if PATCH_USE_GREGORY == 2
- if (any(u0v0_mask)) eval_direct(u0v0_mask,patches[0],Vec2<vfloat>(2.0f*u,2.0f*v),2.0f*dscale,depth+1,&border0l,nullptr,nullptr,&border3r);
- if (any(u1v0_mask)) eval_direct(u1v0_mask,patches[1],Vec2<vfloat>(2.0f*u-1.0f,2.0f*v),2.0f*dscale,depth+1,&border0r,&border1l,nullptr,nullptr);
- if (any(u1v1_mask)) eval_direct(u1v1_mask,patches[2],Vec2<vfloat>(2.0f*u-1.0f,2.0f*v-1.0f),2.0f*dscale,depth+1,nullptr,&border1r,&border2l,nullptr);
- if (any(u0v1_mask)) eval_direct(u0v1_mask,patches[3],Vec2<vfloat>(2.0f*u,2.0f*v-1.0f),2.0f*dscale,depth+1,nullptr,nullptr,&border2r,&border3l);
-#else
- if (any(u0v0_mask)) eval_direct(u0v0_mask,patches[0],Vec2<vfloat>(2.0f*u,2.0f*v),2.0f*dscale,depth+1);
- if (any(u1v0_mask)) eval_direct(u1v0_mask,patches[1],Vec2<vfloat>(2.0f*u-1.0f,2.0f*v),2.0f*dscale,depth+1);
- if (any(u1v1_mask)) eval_direct(u1v1_mask,patches[2],Vec2<vfloat>(2.0f*u-1.0f,2.0f*v-1.0f),2.0f*dscale,depth+1);
- if (any(u0v1_mask)) eval_direct(u0v1_mask,patches[3],Vec2<vfloat>(2.0f*u,2.0f*v-1.0f),2.0f*dscale,depth+1);
-#endif
- }
-
- __forceinline bool final(const CatmullClarkPatch& patch, const typename CatmullClarkRing::Type type, size_t depth)
- {
- const size_t max_eval_depth = (type & CatmullClarkRing::TYPE_CREASES) ? PATCH_MAX_EVAL_DEPTH_CREASE : PATCH_MAX_EVAL_DEPTH_IRREGULAR;
-//#if PATCH_MIN_RESOLUTION
-// return patch.isFinalResolution(PATCH_MIN_RESOLUTION) || depth>=max_eval_depth;
-//#else
- return depth>=max_eval_depth;
-//#endif
- }
-
- void eval_direct(const vbool& valid, const CatmullClarkPatch& patch, const Vec2<vfloat>& uv, float dscale, size_t depth,
- BezierCurve* border0 = nullptr, BezierCurve* border1 = nullptr, BezierCurve* border2 = nullptr, BezierCurve* border3 = nullptr)
- {
- typename CatmullClarkPatch::Type ty = patch.type();
-
- if (unlikely(final(patch,ty,depth)))
- {
- if (ty & CatmullClarkRing::TYPE_REGULAR) {
- RegularPatch(patch,border0,border1,border2,border3).eval(valid,uv.x,uv.y,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
- } else {
- IrregularFillPatch(patch,border0,border1,border2,border3).eval(valid,uv.x,uv.y,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
- }
- }
- else if (ty & CatmullClarkRing::TYPE_REGULAR_CREASES) {
- assert(depth > 0); RegularPatch(patch,border0,border1,border2,border3).eval(valid,uv.x,uv.y,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
- }
-#if PATCH_USE_GREGORY == 2
- else if (ty & CatmullClarkRing::TYPE_GREGORY_CREASES) {
- assert(depth > 0); GregoryPatch(patch,border0,border1,border2,border3).eval(valid,uv.x,uv.y,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
- }
-#endif
- else
- {
- array_t<CatmullClarkPatch,4> patches;
- patch.subdivide(patches); // FIXME: only have to generate one of the patches
- eval_quad_direct(valid,patches,uv,dscale,depth);
- }
- }
-
- void eval_direct(const vbool& valid, const GeneralCatmullClarkPatch& patch, const Vec2<vfloat>& uv, const size_t depth)
- {
- /* convert into standard quad patch if possible */
- if (likely(patch.isQuadPatch())) {
- CatmullClarkPatch qpatch; patch.init(qpatch);
- return eval_direct(valid,qpatch,uv,1.0f,depth);
- }
-
- /* subdivide patch */
- unsigned Nc;
- array_t<CatmullClarkPatch,GeneralCatmullClarkPatch::SIZE> patches;
- patch.subdivide(patches,Nc); // FIXME: only have to generate one of the patches
-
- /* parametrization for quads */
- if (Nc == 4)
- eval_general_quad_direct(valid,patch,patches,uv,1.0f,depth);
-
- /* parametrization for arbitrary polygons */
- else
- {
- const vint l = (vint)floor(0.5f*uv.x); const vfloat u = 2.0f*frac(0.5f*uv.x)-0.5f;
- const vint h = (vint)floor(0.5f*uv.y); const vfloat v = 2.0f*frac(0.5f*uv.y)-0.5f;
- const vint i = (h<<2)+l; assert(all(valid,i<Nc));
- foreach_unique(valid,i,[&](const vbool& valid, const int i) {
-#if PATCH_USE_GREGORY == 2
- BezierCurve borders[2]; patch.getLimitBorder(borders,i);
- BezierCurve border0l,border0r; borders[0].subdivide(border0l,border0r);
- BezierCurve border2l,border2r; borders[1].subdivide(border2l,border2r);
- eval_direct(valid,patches[i],Vec2<vfloat>(u,v),1.0f,depth+1, &border0l, nullptr, nullptr, &border2r);
-#else
- eval_direct(valid,patches[i],Vec2<vfloat>(u,v),1.0f,depth+1);
-#endif
- });
- }
- }
-
- private:
- float* const P;
- float* const dPdu;
- float* const dPdv;
- float* const ddPdudu;
- float* const ddPdvdv;
- float* const ddPdudv;
- const size_t dstride;
- const size_t N;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/gregory_patch.h b/thirdparty/embree-aarch64/kernels/subdiv/gregory_patch.h
deleted file mode 100644
index 2a7c4b1f2c..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/gregory_patch.h
+++ /dev/null
@@ -1,893 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "catmullclark_patch.h"
-#include "bezier_patch.h"
-#include "bezier_curve.h"
-#include "catmullclark_coefficients.h"
-
-namespace embree
-{
- template<typename Vertex, typename Vertex_t = Vertex>
- class __aligned(64) GregoryPatchT
- {
- typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
- typedef GeneralCatmullClarkPatchT<Vertex,Vertex_t> GeneralCatmullClarkPatch;
- typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClark1Ring;
- typedef BezierCurveT<Vertex> BezierCurve;
-
- public:
- Vertex v[4][4];
- Vertex f[2][2];
-
- __forceinline GregoryPatchT() {}
-
- __forceinline GregoryPatchT(const CatmullClarkPatch& patch) {
- init(patch);
- }
-
- __forceinline GregoryPatchT(const CatmullClarkPatch& patch,
- const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3)
- {
- init_crackfix(patch,border0,border1,border2,border3);
- }
-
- __forceinline GregoryPatchT (const HalfEdge* edge, const char* vertices, size_t stride) {
- init(CatmullClarkPatch(edge,vertices,stride));
- }
-
- __forceinline Vertex& p0() { return v[0][0]; }
- __forceinline Vertex& p1() { return v[0][3]; }
- __forceinline Vertex& p2() { return v[3][3]; }
- __forceinline Vertex& p3() { return v[3][0]; }
-
- __forceinline Vertex& e0_p() { return v[0][1]; }
- __forceinline Vertex& e0_m() { return v[1][0]; }
- __forceinline Vertex& e1_p() { return v[1][3]; }
- __forceinline Vertex& e1_m() { return v[0][2]; }
- __forceinline Vertex& e2_p() { return v[3][2]; }
- __forceinline Vertex& e2_m() { return v[2][3]; }
- __forceinline Vertex& e3_p() { return v[2][0]; }
- __forceinline Vertex& e3_m() { return v[3][1]; }
-
- __forceinline Vertex& f0_p() { return v[1][1]; }
- __forceinline Vertex& f1_p() { return v[1][2]; }
- __forceinline Vertex& f2_p() { return v[2][2]; }
- __forceinline Vertex& f3_p() { return v[2][1]; }
- __forceinline Vertex& f0_m() { return f[0][0]; }
- __forceinline Vertex& f1_m() { return f[0][1]; }
- __forceinline Vertex& f2_m() { return f[1][1]; }
- __forceinline Vertex& f3_m() { return f[1][0]; }
-
- __forceinline const Vertex& p0() const { return v[0][0]; }
- __forceinline const Vertex& p1() const { return v[0][3]; }
- __forceinline const Vertex& p2() const { return v[3][3]; }
- __forceinline const Vertex& p3() const { return v[3][0]; }
-
- __forceinline const Vertex& e0_p() const { return v[0][1]; }
- __forceinline const Vertex& e0_m() const { return v[1][0]; }
- __forceinline const Vertex& e1_p() const { return v[1][3]; }
- __forceinline const Vertex& e1_m() const { return v[0][2]; }
- __forceinline const Vertex& e2_p() const { return v[3][2]; }
- __forceinline const Vertex& e2_m() const { return v[2][3]; }
- __forceinline const Vertex& e3_p() const { return v[2][0]; }
- __forceinline const Vertex& e3_m() const { return v[3][1]; }
-
- __forceinline const Vertex& f0_p() const { return v[1][1]; }
- __forceinline const Vertex& f1_p() const { return v[1][2]; }
- __forceinline const Vertex& f2_p() const { return v[2][2]; }
- __forceinline const Vertex& f3_p() const { return v[2][1]; }
- __forceinline const Vertex& f0_m() const { return f[0][0]; }
- __forceinline const Vertex& f1_m() const { return f[0][1]; }
- __forceinline const Vertex& f2_m() const { return f[1][1]; }
- __forceinline const Vertex& f3_m() const { return f[1][0]; }
-
- __forceinline Vertex initCornerVertex(const CatmullClarkPatch& irreg_patch, const size_t index) {
- return irreg_patch.ring[index].getLimitVertex();
- }
-
- __forceinline Vertex initPositiveEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) {
- return madd(1.0f/3.0f,irreg_patch.ring[index].getLimitTangent(),p_vtx);
- }
-
- __forceinline Vertex initNegativeEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) {
- return madd(1.0f/3.0f,irreg_patch.ring[index].getSecondLimitTangent(),p_vtx);
- }
-
- __forceinline Vertex initPositiveEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx)
- {
- CatmullClark1Ring3fa r0,r1,r2;
- irreg_patch.ring[index].subdivide(r0);
- r0.subdivide(r1);
- r1.subdivide(r2);
- return madd(8.0f/3.0f,r2.getLimitTangent(),p_vtx);
- }
-
- __forceinline Vertex initNegativeEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx)
- {
- CatmullClark1Ring3fa r0,r1,r2;
- irreg_patch.ring[index].subdivide(r0);
- r0.subdivide(r1);
- r1.subdivide(r2);
- return madd(8.0f/3.0f,r2.getSecondLimitTangent(),p_vtx);
- }
-
- void initFaceVertex(const CatmullClarkPatch& irreg_patch,
- const size_t index,
- const Vertex& p_vtx,
- const Vertex& e0_p_vtx,
- const Vertex& e1_m_vtx,
- const unsigned int face_valence_p1,
- const Vertex& e0_m_vtx,
- const Vertex& e3_p_vtx,
- const unsigned int face_valence_p3,
- Vertex& f_p_vtx,
- Vertex& f_m_vtx)
- {
- const unsigned int face_valence = irreg_patch.ring[index].face_valence;
- const unsigned int edge_valence = irreg_patch.ring[index].edge_valence;
- const unsigned int border_index = irreg_patch.ring[index].border_index;
-
- const Vertex& vtx = irreg_patch.ring[index].vtx;
- const Vertex e_i = irreg_patch.ring[index].getEdgeCenter(0);
- const Vertex c_i_m_1 = irreg_patch.ring[index].getQuadCenter(0);
- const Vertex e_i_m_1 = irreg_patch.ring[index].getEdgeCenter(1);
-
- Vertex c_i, e_i_p_1;
- const bool hasHardEdge0 =
- std::isinf(irreg_patch.ring[index].vertex_crease_weight) &&
- std::isinf(irreg_patch.ring[index].crease_weight[0]);
-
- if (unlikely((border_index == edge_valence-2) || hasHardEdge0))
- {
- /* mirror quad center and edge mid-point */
- c_i = madd(2.0f, e_i - c_i_m_1, c_i_m_1);
- e_i_p_1 = madd(2.0f, vtx - e_i_m_1, e_i_m_1);
- }
- else
- {
- c_i = irreg_patch.ring[index].getQuadCenter( face_valence-1 );
- e_i_p_1 = irreg_patch.ring[index].getEdgeCenter( face_valence-1 );
- }
-
- Vertex c_i_m_2, e_i_m_2;
- const bool hasHardEdge1 =
- std::isinf(irreg_patch.ring[index].vertex_crease_weight) &&
- std::isinf(irreg_patch.ring[index].crease_weight[1]);
-
- if (unlikely(border_index == 2 || hasHardEdge1))
- {
- /* mirror quad center and edge mid-point */
- c_i_m_2 = madd(2.0f, e_i_m_1 - c_i_m_1, c_i_m_1);
- e_i_m_2 = madd(2.0f, vtx - e_i, + e_i);
- }
- else
- {
- c_i_m_2 = irreg_patch.ring[index].getQuadCenter( 1 );
- e_i_m_2 = irreg_patch.ring[index].getEdgeCenter( 2 );
- }
-
- const float d = 3.0f;
- //const float c = cosf(2.0f*M_PI/(float)face_valence);
- //const float c_e_p = cosf(2.0f*M_PI/(float)face_valence_p1);
- //const float c_e_m = cosf(2.0f*M_PI/(float)face_valence_p3);
-
- const float c = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence);
- const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p1);
- const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p3);
-
- const Vertex r_e_p = 1.0f/3.0f * (e_i_m_1 - e_i_p_1) + 2.0f/3.0f * (c_i_m_1 - c_i);
- const Vertex r_e_m = 1.0f/3.0f * (e_i - e_i_m_2) + 2.0f/3.0f * (c_i_m_1 - c_i_m_2);
-
- f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p);
- f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m);
- }
-
- __noinline void init(const CatmullClarkPatch& patch)
- {
- assert( patch.ring[0].hasValidPositions() );
- assert( patch.ring[1].hasValidPositions() );
- assert( patch.ring[2].hasValidPositions() );
- assert( patch.ring[3].hasValidPositions() );
-
- p0() = initCornerVertex(patch,0);
- p1() = initCornerVertex(patch,1);
- p2() = initCornerVertex(patch,2);
- p3() = initCornerVertex(patch,3);
-
- e0_p() = initPositiveEdgeVertex(patch,0, p0());
- e1_p() = initPositiveEdgeVertex(patch,1, p1());
- e2_p() = initPositiveEdgeVertex(patch,2, p2());
- e3_p() = initPositiveEdgeVertex(patch,3, p3());
-
- e0_m() = initNegativeEdgeVertex(patch,0, p0());
- e1_m() = initNegativeEdgeVertex(patch,1, p1());
- e2_m() = initNegativeEdgeVertex(patch,2, p2());
- e3_m() = initNegativeEdgeVertex(patch,3, p3());
-
- const unsigned int face_valence_p0 = patch.ring[0].face_valence;
- const unsigned int face_valence_p1 = patch.ring[1].face_valence;
- const unsigned int face_valence_p2 = patch.ring[2].face_valence;
- const unsigned int face_valence_p3 = patch.ring[3].face_valence;
-
- initFaceVertex(patch,0,p0(),e0_p(),e1_m(),face_valence_p1,e0_m(),e3_p(),face_valence_p3,f0_p(),f0_m() );
- initFaceVertex(patch,1,p1(),e1_p(),e2_m(),face_valence_p2,e1_m(),e0_p(),face_valence_p0,f1_p(),f1_m() );
- initFaceVertex(patch,2,p2(),e2_p(),e3_m(),face_valence_p3,e2_m(),e1_p(),face_valence_p1,f2_p(),f2_m() );
- initFaceVertex(patch,3,p3(),e3_p(),e0_m(),face_valence_p0,e3_m(),e2_p(),face_valence_p3,f3_p(),f3_m() );
-
- }
-
- __noinline void init_crackfix(const CatmullClarkPatch& patch,
- const BezierCurve* border0,
- const BezierCurve* border1,
- const BezierCurve* border2,
- const BezierCurve* border3)
- {
- assert( patch.ring[0].hasValidPositions() );
- assert( patch.ring[1].hasValidPositions() );
- assert( patch.ring[2].hasValidPositions() );
- assert( patch.ring[3].hasValidPositions() );
-
- p0() = initCornerVertex(patch,0);
- p1() = initCornerVertex(patch,1);
- p2() = initCornerVertex(patch,2);
- p3() = initCornerVertex(patch,3);
-
- e0_p() = initPositiveEdgeVertex(patch,0, p0());
- e1_p() = initPositiveEdgeVertex(patch,1, p1());
- e2_p() = initPositiveEdgeVertex(patch,2, p2());
- e3_p() = initPositiveEdgeVertex(patch,3, p3());
-
- e0_m() = initNegativeEdgeVertex(patch,0, p0());
- e1_m() = initNegativeEdgeVertex(patch,1, p1());
- e2_m() = initNegativeEdgeVertex(patch,2, p2());
- e3_m() = initNegativeEdgeVertex(patch,3, p3());
-
- if (unlikely(border0 != nullptr))
- {
- p0() = border0->v0;
- e0_p() = border0->v1;
- e1_m() = border0->v2;
- p1() = border0->v3;
- }
-
- if (unlikely(border1 != nullptr))
- {
- p1() = border1->v0;
- e1_p() = border1->v1;
- e2_m() = border1->v2;
- p2() = border1->v3;
- }
-
- if (unlikely(border2 != nullptr))
- {
- p2() = border2->v0;
- e2_p() = border2->v1;
- e3_m() = border2->v2;
- p3() = border2->v3;
- }
-
- if (unlikely(border3 != nullptr))
- {
- p3() = border3->v0;
- e3_p() = border3->v1;
- e0_m() = border3->v2;
- p0() = border3->v3;
- }
-
- const unsigned int face_valence_p0 = patch.ring[0].face_valence;
- const unsigned int face_valence_p1 = patch.ring[1].face_valence;
- const unsigned int face_valence_p2 = patch.ring[2].face_valence;
- const unsigned int face_valence_p3 = patch.ring[3].face_valence;
-
- initFaceVertex(patch,0,p0(),e0_p(),e1_m(),face_valence_p1,e0_m(),e3_p(),face_valence_p3,f0_p(),f0_m() );
- initFaceVertex(patch,1,p1(),e1_p(),e2_m(),face_valence_p2,e1_m(),e0_p(),face_valence_p0,f1_p(),f1_m() );
- initFaceVertex(patch,2,p2(),e2_p(),e3_m(),face_valence_p3,e2_m(),e1_p(),face_valence_p1,f2_p(),f2_m() );
- initFaceVertex(patch,3,p3(),e3_p(),e0_m(),face_valence_p0,e3_m(),e2_p(),face_valence_p3,f3_p(),f3_m() );
- }
-
-
- void computeGregoryPatchFacePoints(const unsigned int face_valence,
- const Vertex& r_e_p,
- const Vertex& r_e_m,
- const Vertex& p_vtx,
- const Vertex& e0_p_vtx,
- const Vertex& e1_m_vtx,
- const unsigned int face_valence_p1,
- const Vertex& e0_m_vtx,
- const Vertex& e3_p_vtx,
- const unsigned int face_valence_p3,
- Vertex& f_p_vtx,
- Vertex& f_m_vtx,
- const float d = 3.0f)
- {
- //const float c = cosf(2.0*M_PI/(float)face_valence);
- //const float c_e_p = cosf(2.0*M_PI/(float)face_valence_p1);
- //const float c_e_m = cosf(2.0*M_PI/(float)face_valence_p3);
-
- const float c = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence);
- const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p1);
- const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p3);
-
-
- f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p);
- f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m);
- f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p);
- f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m);
- }
-
- __noinline void init(const GeneralCatmullClarkPatch& patch)
- {
- assert(patch.size() == 4);
-#if 0
- CatmullClarkPatch qpatch; patch.init(qpatch);
- init(qpatch);
-#else
- const float face_valence_p0 = patch.ring[0].face_valence;
- const float face_valence_p1 = patch.ring[1].face_valence;
- const float face_valence_p2 = patch.ring[2].face_valence;
- const float face_valence_p3 = patch.ring[3].face_valence;
-
- Vertex p0_r_p, p0_r_m;
- patch.ring[0].computeGregoryPatchEdgePoints( p0(), e0_p(), e0_m(), p0_r_p, p0_r_m );
-
- Vertex p1_r_p, p1_r_m;
- patch.ring[1].computeGregoryPatchEdgePoints( p1(), e1_p(), e1_m(), p1_r_p, p1_r_m );
-
- Vertex p2_r_p, p2_r_m;
- patch.ring[2].computeGregoryPatchEdgePoints( p2(), e2_p(), e2_m(), p2_r_p, p2_r_m );
-
- Vertex p3_r_p, p3_r_m;
- patch.ring[3].computeGregoryPatchEdgePoints( p3(), e3_p(), e3_m(), p3_r_p, p3_r_m );
-
- computeGregoryPatchFacePoints(face_valence_p0, p0_r_p, p0_r_m, p0(), e0_p(), e1_m(), face_valence_p1, e0_m(), e3_p(), face_valence_p3, f0_p(), f0_m() );
- computeGregoryPatchFacePoints(face_valence_p1, p1_r_p, p1_r_m, p1(), e1_p(), e2_m(), face_valence_p2, e1_m(), e0_p(), face_valence_p0, f1_p(), f1_m() );
- computeGregoryPatchFacePoints(face_valence_p2, p2_r_p, p2_r_m, p2(), e2_p(), e3_m(), face_valence_p3, e2_m(), e1_p(), face_valence_p1, f2_p(), f2_m() );
- computeGregoryPatchFacePoints(face_valence_p3, p3_r_p, p3_r_m, p3(), e3_p(), e0_m(), face_valence_p0, e3_m(), e2_p(), face_valence_p3, f3_p(), f3_m() );
-
-#endif
- }
-
-
- __forceinline void convert_to_bezier()
- {
- f0_p() = (f0_p() + f0_m()) * 0.5f;
- f1_p() = (f1_p() + f1_m()) * 0.5f;
- f2_p() = (f2_p() + f2_m()) * 0.5f;
- f3_p() = (f3_p() + f3_m()) * 0.5f;
- f0_m() = Vertex( zero );
- f1_m() = Vertex( zero );
- f2_m() = Vertex( zero );
- f3_m() = Vertex( zero );
- }
-
- static __forceinline void computeInnerVertices(const Vertex matrix[4][4], const Vertex f_m[2][2], const float uu, const float vv,
- Vertex_t& matrix_11, Vertex_t& matrix_12, Vertex_t& matrix_22, Vertex_t& matrix_21)
- {
- if (unlikely(uu == 0.0f || uu == 1.0f || vv == 0.0f || vv == 1.0f))
- {
- matrix_11 = matrix[1][1];
- matrix_12 = matrix[1][2];
- matrix_22 = matrix[2][2];
- matrix_21 = matrix[2][1];
- }
- else
- {
- const Vertex_t f0_p = matrix[1][1];
- const Vertex_t f1_p = matrix[1][2];
- const Vertex_t f2_p = matrix[2][2];
- const Vertex_t f3_p = matrix[2][1];
-
- const Vertex_t f0_m = f_m[0][0];
- const Vertex_t f1_m = f_m[0][1];
- const Vertex_t f2_m = f_m[1][1];
- const Vertex_t f3_m = f_m[1][0];
-
- matrix_11 = ( uu * f0_p + vv * f0_m)*rcp(uu+vv);
- matrix_12 = ((1.0f-uu) * f1_m + vv * f1_p)*rcp(1.0f-uu+vv);
- matrix_22 = ((1.0f-uu) * f2_p + (1.0f-vv) * f2_m)*rcp(2.0f-uu-vv);
- matrix_21 = ( uu * f3_m + (1.0f-vv) * f3_p)*rcp(1.0f+uu-vv);
- }
- }
-
- template<typename vfloat>
- static __forceinline void computeInnerVertices(const Vertex v[4][4], const Vertex f[2][2],
- size_t i, const vfloat& uu, const vfloat& vv, vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21)
- {
- const auto m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f);
-
- const vfloat f0_p = v[1][1][i];
- const vfloat f1_p = v[1][2][i];
- const vfloat f2_p = v[2][2][i];
- const vfloat f3_p = v[2][1][i];
-
- const vfloat f0_m = f[0][0][i];
- const vfloat f1_m = f[0][1][i];
- const vfloat f2_m = f[1][1][i];
- const vfloat f3_m = f[1][0][i];
-
- const vfloat one_minus_uu = vfloat(1.0f) - uu;
- const vfloat one_minus_vv = vfloat(1.0f) - vv;
-
- const vfloat f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv);
- const vfloat f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv);
- const vfloat f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
- const vfloat f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
-
- matrix_11 = select(m_border,f0_p,f0_i);
- matrix_12 = select(m_border,f1_p,f1_i);
- matrix_22 = select(m_border,f2_p,f2_i);
- matrix_21 = select(m_border,f3_p,f3_i);
- }
-
- static __forceinline Vertex eval(const Vertex matrix[4][4], const Vertex f[2][2], const float& uu, const float& vv)
- {
- Vertex_t v_11, v_12, v_22, v_21;
- computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
-
- const Vec4<float> Bu = BezierBasis::eval(uu);
- const Vec4<float> Bv = BezierBasis::eval(vv);
-
- return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
- madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
- madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
- Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
- }
-
- static __forceinline Vertex eval_du(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
- {
- Vertex_t v_11, v_12, v_22, v_21;
- computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
-
- const Vec4<float> Bu = BezierBasis::derivative(uu);
- const Vec4<float> Bv = BezierBasis::eval(vv);
-
- return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
- madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
- madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
- Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
- }
-
- static __forceinline Vertex eval_dv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
- {
- Vertex_t v_11, v_12, v_22, v_21;
- computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
-
- const Vec4<float> Bu = BezierBasis::eval(uu);
- const Vec4<float> Bv = BezierBasis::derivative(vv);
-
- return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
- madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
- madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
- Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
- }
-
- static __forceinline Vertex eval_dudu(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
- {
- Vertex_t v_11, v_12, v_22, v_21;
- computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
-
- const Vec4<float> Bu = BezierBasis::derivative2(uu);
- const Vec4<float> Bv = BezierBasis::eval(vv);
-
- return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
- madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
- madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
- Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
- }
-
- static __forceinline Vertex eval_dvdv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
- {
- Vertex_t v_11, v_12, v_22, v_21;
- computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
-
- const Vec4<float> Bu = BezierBasis::eval(uu);
- const Vec4<float> Bv = BezierBasis::derivative2(vv);
-
- return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
- madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
- madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
- Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
- }
-
- static __forceinline Vertex eval_dudv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
- {
- Vertex_t v_11, v_12, v_22, v_21;
- computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
-
- const Vec4<float> Bu = BezierBasis::derivative(uu);
- const Vec4<float> Bv = BezierBasis::derivative(vv);
-
- return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
- madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
- madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
- Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
- }
-
- __forceinline Vertex eval(const float uu, const float vv) const {
- return eval(v,f,uu,vv);
- }
-
- __forceinline Vertex eval_du( const float uu, const float vv) const {
- return eval_du(v,f,uu,vv);
- }
-
- __forceinline Vertex eval_dv( const float uu, const float vv) const {
- return eval_dv(v,f,uu,vv);
- }
-
- __forceinline Vertex eval_dudu( const float uu, const float vv) const {
- return eval_dudu(v,f,uu,vv);
- }
-
- __forceinline Vertex eval_dvdv( const float uu, const float vv) const {
- return eval_dvdv(v,f,uu,vv);
- }
-
- __forceinline Vertex eval_dudv( const float uu, const float vv) const {
- return eval_dudv(v,f,uu,vv);
- }
-
- static __forceinline Vertex normal(const Vertex matrix[4][4], const Vertex f_m[2][2], const float uu, const float vv) // FIXME: why not using basis functions
- {
- /* interpolate inner vertices */
- Vertex_t matrix_11, matrix_12, matrix_22, matrix_21;
- computeInnerVertices(matrix,f_m,uu,vv,matrix_11, matrix_12, matrix_22, matrix_21);
-
- /* tangentU */
- const Vertex_t col0 = deCasteljau(vv, (Vertex_t)matrix[0][0], (Vertex_t)matrix[1][0], (Vertex_t)matrix[2][0], (Vertex_t)matrix[3][0]);
- const Vertex_t col1 = deCasteljau(vv, (Vertex_t)matrix[0][1], (Vertex_t)matrix_11 , (Vertex_t)matrix_21 , (Vertex_t)matrix[3][1]);
- const Vertex_t col2 = deCasteljau(vv, (Vertex_t)matrix[0][2], (Vertex_t)matrix_12 , (Vertex_t)matrix_22 , (Vertex_t)matrix[3][2]);
- const Vertex_t col3 = deCasteljau(vv, (Vertex_t)matrix[0][3], (Vertex_t)matrix[1][3], (Vertex_t)matrix[2][3], (Vertex_t)matrix[3][3]);
-
- const Vertex_t tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3);
-
- /* tangentV */
- const Vertex_t row0 = deCasteljau(uu, (Vertex_t)matrix[0][0], (Vertex_t)matrix[0][1], (Vertex_t)matrix[0][2], (Vertex_t)matrix[0][3]);
- const Vertex_t row1 = deCasteljau(uu, (Vertex_t)matrix[1][0], (Vertex_t)matrix_11 , (Vertex_t)matrix_12 , (Vertex_t)matrix[1][3]);
- const Vertex_t row2 = deCasteljau(uu, (Vertex_t)matrix[2][0], (Vertex_t)matrix_21 , (Vertex_t)matrix_22 , (Vertex_t)matrix[2][3]);
- const Vertex_t row3 = deCasteljau(uu, (Vertex_t)matrix[3][0], (Vertex_t)matrix[3][1], (Vertex_t)matrix[3][2], (Vertex_t)matrix[3][3]);
-
- const Vertex_t tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3);
-
- /* normal = tangentU x tangentV */
- const Vertex_t n = cross(tangentU,tangentV);
-
- return n;
- }
-
- __forceinline Vertex normal( const float uu, const float vv) const {
- return normal(v,f,uu,vv);
- }
-
- __forceinline void eval(const float u, const float v,
- Vertex* P, Vertex* dPdu, Vertex* dPdv,
- Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv,
- const float dscale = 1.0f) const
- {
- if (P) {
- *P = eval(u,v);
- }
- if (dPdu) {
- assert(dPdu); *dPdu = eval_du(u,v)*dscale;
- assert(dPdv); *dPdv = eval_dv(u,v)*dscale;
- }
- if (ddPdudu) {
- assert(ddPdudu); *ddPdudu = eval_dudu(u,v)*sqr(dscale);
- assert(ddPdvdv); *ddPdvdv = eval_dvdv(u,v)*sqr(dscale);
- assert(ddPdudv); *ddPdudv = eval_dudv(u,v)*sqr(dscale);
- }
- }
-
- template<class vfloat>
- static __forceinline vfloat eval(const Vertex v[4][4], const Vertex f[2][2],
- const size_t i, const vfloat& uu, const vfloat& vv, const Vec4<vfloat>& u_n, const Vec4<vfloat>& v_n,
- vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21)
- {
- const vfloat curve0_x = madd(v_n[0],vfloat(v[0][0][i]),madd(v_n[1],vfloat(v[1][0][i]),madd(v_n[2],vfloat(v[2][0][i]),v_n[3] * vfloat(v[3][0][i]))));
- const vfloat curve1_x = madd(v_n[0],vfloat(v[0][1][i]),madd(v_n[1],vfloat(matrix_11 ),madd(v_n[2],vfloat(matrix_21 ),v_n[3] * vfloat(v[3][1][i]))));
- const vfloat curve2_x = madd(v_n[0],vfloat(v[0][2][i]),madd(v_n[1],vfloat(matrix_12 ),madd(v_n[2],vfloat(matrix_22 ),v_n[3] * vfloat(v[3][2][i]))));
- const vfloat curve3_x = madd(v_n[0],vfloat(v[0][3][i]),madd(v_n[1],vfloat(v[1][3][i]),madd(v_n[2],vfloat(v[2][3][i]),v_n[3] * vfloat(v[3][3][i]))));
- return madd(u_n[0],curve0_x,madd(u_n[1],curve1_x,madd(u_n[2],curve2_x,u_n[3] * curve3_x)));
- }
-
- template<typename vbool, typename vfloat>
- static __forceinline void eval(const Vertex v[4][4], const Vertex f[2][2],
- const vbool& valid, const vfloat& uu, const vfloat& vv,
- float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
- const float dscale, const size_t dstride, const size_t N)
- {
- if (P) {
- const Vec4<vfloat> u_n = BezierBasis::eval(uu);
- const Vec4<vfloat> v_n = BezierBasis::eval(vv);
- for (size_t i=0; i<N; i++) {
- vfloat matrix_11, matrix_12, matrix_22, matrix_21;
- computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
- vfloat::store(valid,P+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21));
- }
- }
- if (dPdu)
- {
- {
- assert(dPdu);
- const Vec4<vfloat> u_n = BezierBasis::derivative(uu);
- const Vec4<vfloat> v_n = BezierBasis::eval(vv);
- for (size_t i=0; i<N; i++) {
- vfloat matrix_11, matrix_12, matrix_22, matrix_21;
- computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
- vfloat::store(valid,dPdu+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*dscale);
- }
- }
- {
- assert(dPdv);
- const Vec4<vfloat> u_n = BezierBasis::eval(uu);
- const Vec4<vfloat> v_n = BezierBasis::derivative(vv);
- for (size_t i=0; i<N; i++) {
- vfloat matrix_11, matrix_12, matrix_22, matrix_21;
- computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
- vfloat::store(valid,dPdv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*dscale);
- }
- }
- }
- if (ddPdudu)
- {
- {
- assert(ddPdudu);
- const Vec4<vfloat> u_n = BezierBasis::derivative2(uu);
- const Vec4<vfloat> v_n = BezierBasis::eval(vv);
- for (size_t i=0; i<N; i++) {
- vfloat matrix_11, matrix_12, matrix_22, matrix_21;
- computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
- vfloat::store(valid,ddPdudu+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale));
- }
- }
- {
- assert(ddPdvdv);
- const Vec4<vfloat> u_n = BezierBasis::eval(uu);
- const Vec4<vfloat> v_n = BezierBasis::derivative2(vv);
- for (size_t i=0; i<N; i++) {
- vfloat matrix_11, matrix_12, matrix_22, matrix_21;
- computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
- vfloat::store(valid,ddPdvdv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale));
- }
- }
- {
- assert(ddPdudv);
- const Vec4<vfloat> u_n = BezierBasis::derivative(uu);
- const Vec4<vfloat> v_n = BezierBasis::derivative(vv);
- for (size_t i=0; i<N; i++) {
- vfloat matrix_11, matrix_12, matrix_22, matrix_21;
- computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
- vfloat::store(valid,ddPdudv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale));
- }
- }
- }
- }
-
- template<typename vbool, typename vfloat>
- __forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv,
- float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
- const float dscale, const size_t dstride, const size_t N) const {
- eval(v,f,valid,uu,vv,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
- }
-
- template<class T>
- static __forceinline Vec3<T> eval_t(const Vertex matrix[4][4], const Vec3<T> f[2][2], const T& uu, const T& vv)
- {
- typedef typename T::Bool M;
- const M m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f);
-
- const Vec3<T> f0_p = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z);
- const Vec3<T> f1_p = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z);
- const Vec3<T> f2_p = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z);
- const Vec3<T> f3_p = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z);
-
- const Vec3<T> f0_m = f[0][0];
- const Vec3<T> f1_m = f[0][1];
- const Vec3<T> f2_m = f[1][1];
- const Vec3<T> f3_m = f[1][0];
-
- const T one_minus_uu = T(1.0f) - uu;
- const T one_minus_vv = T(1.0f) - vv;
-
- const Vec3<T> f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv);
- const Vec3<T> f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv);
- const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
- const Vec3<T> f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
-
- const Vec3<T> F0( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) );
- const Vec3<T> F1( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) );
- const Vec3<T> F2( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) );
- const Vec3<T> F3( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) );
-
- const T B0_u = one_minus_uu * one_minus_uu * one_minus_uu;
- const T B0_v = one_minus_vv * one_minus_vv * one_minus_vv;
- const T B1_u = 3.0f * (one_minus_uu * uu * one_minus_uu);
- const T B1_v = 3.0f * (one_minus_vv * vv * one_minus_vv);
- const T B2_u = 3.0f * (uu * one_minus_uu * uu);
- const T B2_v = 3.0f * (vv * one_minus_vv * vv);
- const T B3_u = uu * uu * uu;
- const T B3_v = vv * vv * vv;
-
- const T x = madd(B0_v,madd(B0_u,matrix[0][0].x,madd(B1_u,matrix[0][1].x,madd(B2_u,matrix[0][2].x,B3_u * matrix[0][3].x))),
- madd(B1_v,madd(B0_u,matrix[1][0].x,madd(B1_u,F0.x ,madd(B2_u,F1.x ,B3_u * matrix[1][3].x))),
- madd(B2_v,madd(B0_u,matrix[2][0].x,madd(B1_u,F3.x ,madd(B2_u,F2.x ,B3_u * matrix[2][3].x))),
- B3_v*madd(B0_u,matrix[3][0].x,madd(B1_u,matrix[3][1].x,madd(B2_u,matrix[3][2].x,B3_u * matrix[3][3].x))))));
-
- const T y = madd(B0_v,madd(B0_u,matrix[0][0].y,madd(B1_u,matrix[0][1].y,madd(B2_u,matrix[0][2].y,B3_u * matrix[0][3].y))),
- madd(B1_v,madd(B0_u,matrix[1][0].y,madd(B1_u,F0.y ,madd(B2_u,F1.y ,B3_u * matrix[1][3].y))),
- madd(B2_v,madd(B0_u,matrix[2][0].y,madd(B1_u,F3.y ,madd(B2_u,F2.y ,B3_u * matrix[2][3].y))),
- B3_v*madd(B0_u,matrix[3][0].y,madd(B1_u,matrix[3][1].y,madd(B2_u,matrix[3][2].y,B3_u * matrix[3][3].y))))));
-
- const T z = madd(B0_v,madd(B0_u,matrix[0][0].z,madd(B1_u,matrix[0][1].z,madd(B2_u,matrix[0][2].z,B3_u * matrix[0][3].z))),
- madd(B1_v,madd(B0_u,matrix[1][0].z,madd(B1_u,F0.z ,madd(B2_u,F1.z ,B3_u * matrix[1][3].z))),
- madd(B2_v,madd(B0_u,matrix[2][0].z,madd(B1_u,F3.z ,madd(B2_u,F2.z ,B3_u * matrix[2][3].z))),
- B3_v*madd(B0_u,matrix[3][0].z,madd(B1_u,matrix[3][1].z,madd(B2_u,matrix[3][2].z,B3_u * matrix[3][3].z))))));
-
- return Vec3<T>(x,y,z);
- }
-
- template<class T>
- __forceinline Vec3<T> eval(const T& uu, const T& vv) const
- {
- Vec3<T> ff[2][2];
- ff[0][0] = Vec3<T>(f[0][0]);
- ff[0][1] = Vec3<T>(f[0][1]);
- ff[1][1] = Vec3<T>(f[1][1]);
- ff[1][0] = Vec3<T>(f[1][0]);
- return eval_t(v,ff,uu,vv);
- }
-
- template<class T>
- static __forceinline Vec3<T> normal_t(const Vertex matrix[4][4], const Vec3<T> f[2][2], const T& uu, const T& vv)
- {
- typedef typename T::Bool M;
-
- const Vec3<T> f0_p = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z);
- const Vec3<T> f1_p = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z);
- const Vec3<T> f2_p = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z);
- const Vec3<T> f3_p = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z);
-
- const Vec3<T> f0_m = f[0][0];
- const Vec3<T> f1_m = f[0][1];
- const Vec3<T> f2_m = f[1][1];
- const Vec3<T> f3_m = f[1][0];
-
- const T one_minus_uu = T(1.0f) - uu;
- const T one_minus_vv = T(1.0f) - vv;
-
- const Vec3<T> f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv);
- const Vec3<T> f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv);
- const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
- const Vec3<T> f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
-
-#if 1
- const M m_corner0 = (uu == 0.0f) & (vv == 0.0f);
- const M m_corner1 = (uu == 1.0f) & (vv == 0.0f);
- const M m_corner2 = (uu == 1.0f) & (vv == 1.0f);
- const M m_corner3 = (uu == 0.0f) & (vv == 1.0f);
- const Vec3<T> matrix_11( select(m_corner0,f0_p.x,f0_i.x), select(m_corner0,f0_p.y,f0_i.y), select(m_corner0,f0_p.z,f0_i.z) );
- const Vec3<T> matrix_12( select(m_corner1,f1_p.x,f1_i.x), select(m_corner1,f1_p.y,f1_i.y), select(m_corner1,f1_p.z,f1_i.z) );
- const Vec3<T> matrix_22( select(m_corner2,f2_p.x,f2_i.x), select(m_corner2,f2_p.y,f2_i.y), select(m_corner2,f2_p.z,f2_i.z) );
- const Vec3<T> matrix_21( select(m_corner3,f3_p.x,f3_i.x), select(m_corner3,f3_p.y,f3_i.y), select(m_corner3,f3_p.z,f3_i.z) );
-#else
- const M m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f);
- const Vec3<T> matrix_11( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) );
- const Vec3<T> matrix_12( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) );
- const Vec3<T> matrix_22( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) );
- const Vec3<T> matrix_21( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) );
-#endif
-
- const Vec3<T> matrix_00 = Vec3<T>(matrix[0][0].x,matrix[0][0].y,matrix[0][0].z);
- const Vec3<T> matrix_10 = Vec3<T>(matrix[1][0].x,matrix[1][0].y,matrix[1][0].z);
- const Vec3<T> matrix_20 = Vec3<T>(matrix[2][0].x,matrix[2][0].y,matrix[2][0].z);
- const Vec3<T> matrix_30 = Vec3<T>(matrix[3][0].x,matrix[3][0].y,matrix[3][0].z);
-
- const Vec3<T> matrix_01 = Vec3<T>(matrix[0][1].x,matrix[0][1].y,matrix[0][1].z);
- const Vec3<T> matrix_02 = Vec3<T>(matrix[0][2].x,matrix[0][2].y,matrix[0][2].z);
- const Vec3<T> matrix_03 = Vec3<T>(matrix[0][3].x,matrix[0][3].y,matrix[0][3].z);
-
- const Vec3<T> matrix_31 = Vec3<T>(matrix[3][1].x,matrix[3][1].y,matrix[3][1].z);
- const Vec3<T> matrix_32 = Vec3<T>(matrix[3][2].x,matrix[3][2].y,matrix[3][2].z);
- const Vec3<T> matrix_33 = Vec3<T>(matrix[3][3].x,matrix[3][3].y,matrix[3][3].z);
-
- const Vec3<T> matrix_13 = Vec3<T>(matrix[1][3].x,matrix[1][3].y,matrix[1][3].z);
- const Vec3<T> matrix_23 = Vec3<T>(matrix[2][3].x,matrix[2][3].y,matrix[2][3].z);
-
- /* tangentU */
- const Vec3<T> col0 = deCasteljau(vv, matrix_00, matrix_10, matrix_20, matrix_30);
- const Vec3<T> col1 = deCasteljau(vv, matrix_01, matrix_11, matrix_21, matrix_31);
- const Vec3<T> col2 = deCasteljau(vv, matrix_02, matrix_12, matrix_22, matrix_32);
- const Vec3<T> col3 = deCasteljau(vv, matrix_03, matrix_13, matrix_23, matrix_33);
-
- const Vec3<T> tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3);
-
- /* tangentV */
- const Vec3<T> row0 = deCasteljau(uu, matrix_00, matrix_01, matrix_02, matrix_03);
- const Vec3<T> row1 = deCasteljau(uu, matrix_10, matrix_11, matrix_12, matrix_13);
- const Vec3<T> row2 = deCasteljau(uu, matrix_20, matrix_21, matrix_22, matrix_23);
- const Vec3<T> row3 = deCasteljau(uu, matrix_30, matrix_31, matrix_32, matrix_33);
-
- const Vec3<T> tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3);
-
- /* normal = tangentU x tangentV */
- const Vec3<T> n = cross(tangentU,tangentV);
- return n;
- }
-
- template<class T>
- __forceinline Vec3<T> normal(const T& uu, const T& vv) const
- {
- Vec3<T> ff[2][2];
- ff[0][0] = Vec3<T>(f[0][0]);
- ff[0][1] = Vec3<T>(f[0][1]);
- ff[1][1] = Vec3<T>(f[1][1]);
- ff[1][0] = Vec3<T>(f[1][0]);
- return normal_t(v,ff,uu,vv);
- }
-
- __forceinline BBox<Vertex> bounds() const
- {
- const Vertex *const cv = &v[0][0];
- BBox<Vertex> bounds (cv[0]);
- for (size_t i=1; i<16; i++)
- bounds.extend( cv[i] );
- bounds.extend(f[0][0]);
- bounds.extend(f[1][0]);
- bounds.extend(f[1][1]);
- bounds.extend(f[1][1]);
- return bounds;
- }
-
- friend embree_ostream operator<<(embree_ostream o, const GregoryPatchT& p)
- {
- for (size_t y=0; y<4; y++)
- for (size_t x=0; x<4; x++)
- o << "v[" << y << "][" << x << "] " << p.v[y][x] << embree_endl;
-
- for (size_t y=0; y<2; y++)
- for (size_t x=0; x<2; x++)
- o << "f[" << y << "][" << x << "] " << p.f[y][x] << embree_endl;
- return o;
- }
- };
-
- typedef GregoryPatchT<Vec3fa,Vec3fa_t> GregoryPatch3fa;
-
- template<typename Vertex, typename Vertex_t>
- __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT (const HalfEdge* edge, const char* vertices, size_t stride)
- {
- CatmullClarkPatchT<Vertex,Vertex_t> patch(edge,vertices,stride);
- GregoryPatchT<Vertex,Vertex_t> gpatch(patch);
- gpatch.convert_to_bezier();
- for (size_t y=0; y<4; y++)
- for (size_t x=0; x<4; x++)
- matrix[y][x] = (Vertex_t)gpatch.v[y][x];
- }
-
- template<typename Vertex, typename Vertex_t>
- __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch)
- {
- GregoryPatchT<Vertex,Vertex_t> gpatch(patch);
- gpatch.convert_to_bezier();
- for (size_t y=0; y<4; y++)
- for (size_t x=0; x<4; x++)
- matrix[y][x] = (Vertex_t)gpatch.v[y][x];
- }
-
- template<typename Vertex, typename Vertex_t>
- __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch,
- const BezierCurveT<Vertex>* border0,
- const BezierCurveT<Vertex>* border1,
- const BezierCurveT<Vertex>* border2,
- const BezierCurveT<Vertex>* border3)
- {
- GregoryPatchT<Vertex,Vertex_t> gpatch(patch,border0,border1,border2,border3);
- gpatch.convert_to_bezier();
- for (size_t y=0; y<4; y++)
- for (size_t x=0; x<4; x++)
- matrix[y][x] = (Vertex_t)gpatch.v[y][x];
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/gregory_patch_dense.h b/thirdparty/embree-aarch64/kernels/subdiv/gregory_patch_dense.h
deleted file mode 100644
index 85effd02cf..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/gregory_patch_dense.h
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "gregory_patch.h"
-
-namespace embree
-{
- class __aligned(64) DenseGregoryPatch3fa
- {
- typedef Vec3fa Vec3fa_4x4[4][4];
- public:
-
- __forceinline DenseGregoryPatch3fa (const GregoryPatch3fa& patch)
- {
- for (size_t y=0; y<4; y++)
- for (size_t x=0; x<4; x++)
- matrix[y][x] = Vec3ff(patch.v[y][x], 0.0f);
-
- matrix[0][0].w = patch.f[0][0].x;
- matrix[0][1].w = patch.f[0][0].y;
- matrix[0][2].w = patch.f[0][0].z;
- matrix[0][3].w = 0.0f;
-
- matrix[1][0].w = patch.f[0][1].x;
- matrix[1][1].w = patch.f[0][1].y;
- matrix[1][2].w = patch.f[0][1].z;
- matrix[1][3].w = 0.0f;
-
- matrix[2][0].w = patch.f[1][1].x;
- matrix[2][1].w = patch.f[1][1].y;
- matrix[2][2].w = patch.f[1][1].z;
- matrix[2][3].w = 0.0f;
-
- matrix[3][0].w = patch.f[1][0].x;
- matrix[3][1].w = patch.f[1][0].y;
- matrix[3][2].w = patch.f[1][0].z;
- matrix[3][3].w = 0.0f;
- }
-
- __forceinline void extract_f_m(Vec3fa f_m[2][2]) const
- {
- f_m[0][0] = Vec3fa( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w );
- f_m[0][1] = Vec3fa( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w );
- f_m[1][1] = Vec3fa( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w );
- f_m[1][0] = Vec3fa( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w );
- }
-
- __forceinline Vec3fa eval(const float uu, const float vv) const
- {
- __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m);
- return GregoryPatch3fa::eval(*(Vec3fa_4x4*)&matrix,f_m,uu,vv);
- }
-
- __forceinline Vec3fa normal(const float uu, const float vv) const
- {
- __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m);
- return GregoryPatch3fa::normal(*(Vec3fa_4x4*)&matrix,f_m,uu,vv);
- }
-
- template<class T>
- __forceinline Vec3<T> eval(const T &uu, const T &vv) const
- {
- Vec3<T> f_m[2][2];
- f_m[0][0] = Vec3<T>( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w );
- f_m[0][1] = Vec3<T>( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w );
- f_m[1][1] = Vec3<T>( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w );
- f_m[1][0] = Vec3<T>( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w );
- return GregoryPatch3fa::eval_t(*(Vec3fa_4x4*)&matrix,f_m,uu,vv);
- }
-
- template<class T>
- __forceinline Vec3<T> normal(const T &uu, const T &vv) const
- {
- Vec3<T> f_m[2][2];
- f_m[0][0] = Vec3<T>( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w );
- f_m[0][1] = Vec3<T>( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w );
- f_m[1][1] = Vec3<T>( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w );
- f_m[1][0] = Vec3<T>( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w );
- return GregoryPatch3fa::normal_t(*(Vec3fa_4x4*)&matrix,f_m,uu,vv);
- }
-
- __forceinline void eval(const float u, const float v,
- Vec3fa* P, Vec3fa* dPdu, Vec3fa* dPdv, Vec3fa* ddPdudu, Vec3fa* ddPdvdv, Vec3fa* ddPdudv,
- const float dscale = 1.0f) const
- {
- __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m);
- if (P) {
- *P = GregoryPatch3fa::eval(*(Vec3fa_4x4*)&matrix,f_m,u,v);
- }
- if (dPdu) {
- assert(dPdu); *dPdu = GregoryPatch3fa::eval_du(*(Vec3fa_4x4*)&matrix,f_m,u,v)*dscale;
- assert(dPdv); *dPdv = GregoryPatch3fa::eval_dv(*(Vec3fa_4x4*)&matrix,f_m,u,v)*dscale;
- }
- if (ddPdudu) {
- assert(ddPdudu); *ddPdudu = GregoryPatch3fa::eval_dudu(*(Vec3fa_4x4*)&matrix,f_m,u,v)*sqr(dscale);
- assert(ddPdvdv); *ddPdvdv = GregoryPatch3fa::eval_dvdv(*(Vec3fa_4x4*)&matrix,f_m,u,v)*sqr(dscale);
- assert(ddPdudv); *ddPdudv = GregoryPatch3fa::eval_dudv(*(Vec3fa_4x4*)&matrix,f_m,u,v)*sqr(dscale);
- }
- }
-
- template<typename vbool, typename vfloat>
- __forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv, float* P, float* dPdu, float* dPdv, const float dscale, const size_t dstride, const size_t N) const
- {
- __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m);
- GregoryPatch3fa::eval(matrix,f_m,valid,uu,vv,P,dPdu,dPdv,dscale,dstride,N);
- }
-
- private:
- Vec3ff matrix[4][4]; // f_p/m points are stored in 4th component
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/gridrange.h b/thirdparty/embree-aarch64/kernels/subdiv/gridrange.h
deleted file mode 100644
index 4fd741c879..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/gridrange.h
+++ /dev/null
@@ -1,96 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/default.h"
-
-namespace embree
-{
- struct __aligned(16) GridRange
- {
- unsigned int u_start;
- unsigned int u_end;
- unsigned int v_start;
- unsigned int v_end;
-
- __forceinline GridRange() {}
-
- __forceinline GridRange(unsigned int u_start, unsigned int u_end, unsigned int v_start, unsigned int v_end)
- : u_start(u_start), u_end(u_end), v_start(v_start), v_end(v_end) {}
-
- __forceinline unsigned int width() const {
- return u_end-u_start+1;
- }
-
- __forceinline unsigned int height() const {
- return v_end-v_start+1;
- }
-
- __forceinline bool hasLeafSize() const
- {
- const unsigned int u_size = u_end-u_start+1;
- const unsigned int v_size = v_end-v_start+1;
- assert(u_size >= 1);
- assert(v_size >= 1);
- return u_size <= 3 && v_size <= 3;
- }
-
- static __forceinline unsigned int split(unsigned int start,unsigned int end)
- {
- const unsigned int center = (start+end)/2;
- assert (center > start);
- assert (center < end);
- return center;
- }
-
- __forceinline void split(GridRange& r0, GridRange& r1) const
- {
- assert( hasLeafSize() == false );
- const unsigned int u_size = u_end-u_start+1;
- const unsigned int v_size = v_end-v_start+1;
- r0 = *this;
- r1 = *this;
-
- if (u_size >= v_size)
- {
- const unsigned int u_mid = split(u_start,u_end);
- r0.u_end = u_mid;
- r1.u_start = u_mid;
- }
- else
- {
- const unsigned int v_mid = split(v_start,v_end);
- r0.v_end = v_mid;
- r1.v_start = v_mid;
- }
- }
-
- __forceinline unsigned int splitIntoSubRanges(GridRange r[4]) const
- {
- assert( !hasLeafSize() );
- unsigned int children = 0;
- GridRange first,second;
- split(first,second);
-
- if (first.hasLeafSize()) {
- r[0] = first;
- children++;
- }
- else {
- first.split(r[0],r[1]);
- children += 2;
- }
-
- if (second.hasLeafSize()) {
- r[children] = second;
- children++;
- }
- else {
- second.split(r[children+0],r[children+1]);
- children += 2;
- }
- return children;
- }
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/half_edge.h b/thirdparty/embree-aarch64/kernels/subdiv/half_edge.h
deleted file mode 100644
index fb350ca71f..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/half_edge.h
+++ /dev/null
@@ -1,371 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "catmullclark_coefficients.h"
-
-namespace embree
-{
- class __aligned(32) HalfEdge
- {
- friend class SubdivMesh;
- public:
-
- enum PatchType : char {
- BILINEAR_PATCH = 0, //!< a bilinear patch
- REGULAR_QUAD_PATCH = 1, //!< a regular quad patch can be represented as a B-Spline
- IRREGULAR_QUAD_PATCH = 2, //!< an irregular quad patch can be represented as a Gregory patch
- COMPLEX_PATCH = 3 //!< these patches need subdivision and cannot be processed by the above fast code paths
- };
-
- enum VertexType : char {
- REGULAR_VERTEX = 0, //!< regular vertex
- NON_MANIFOLD_EDGE_VERTEX = 1, //!< vertex of a non-manifold edge
- };
-
- __forceinline friend PatchType max( const PatchType& ty0, const PatchType& ty1) {
- return (PatchType) max((int)ty0,(int)ty1);
- }
-
- struct Edge
- {
- /*! edge constructor */
- __forceinline Edge(const uint32_t v0, const uint32_t v1)
- : v0(v0), v1(v1) {}
-
- /*! create an 64 bit identifier that is unique for the not oriented edge */
- __forceinline operator uint64_t() const
- {
- uint32_t p0 = v0, p1 = v1;
- if (p0<p1) std::swap(p0,p1);
- return (((uint64_t)p0) << 32) | (uint64_t)p1;
- }
-
- public:
- uint32_t v0,v1; //!< start and end vertex of the edge
- };
-
- HalfEdge ()
- : vtx_index(-1), next_half_edge_ofs(0), prev_half_edge_ofs(0), opposite_half_edge_ofs(0), edge_crease_weight(0),
- vertex_crease_weight(0), edge_level(0), patch_type(COMPLEX_PATCH), vertex_type(REGULAR_VERTEX)
- {
- static_assert(sizeof(HalfEdge) == 32, "invalid half edge size");
- }
-
- __forceinline bool hasOpposite() const { return opposite_half_edge_ofs != 0; }
- __forceinline void setOpposite(HalfEdge* opposite) { opposite_half_edge_ofs = int(opposite-this); }
-
- __forceinline HalfEdge* next() { assert( next_half_edge_ofs != 0 ); return &this[next_half_edge_ofs]; }
- __forceinline const HalfEdge* next() const { assert( next_half_edge_ofs != 0 ); return &this[next_half_edge_ofs]; }
-
- __forceinline HalfEdge* prev() { assert( prev_half_edge_ofs != 0 ); return &this[prev_half_edge_ofs]; }
- __forceinline const HalfEdge* prev() const { assert( prev_half_edge_ofs != 0 ); return &this[prev_half_edge_ofs]; }
-
- __forceinline HalfEdge* opposite() { assert( opposite_half_edge_ofs != 0 ); return &this[opposite_half_edge_ofs]; }
- __forceinline const HalfEdge* opposite() const { assert( opposite_half_edge_ofs != 0 ); return &this[opposite_half_edge_ofs]; }
-
- __forceinline HalfEdge* rotate() { return opposite()->next(); }
- __forceinline const HalfEdge* rotate() const { return opposite()->next(); }
-
- __forceinline unsigned int getStartVertexIndex() const { return vtx_index; }
- __forceinline unsigned int getEndVertexIndex () const { return next()->vtx_index; }
- __forceinline Edge getEdge () const { return Edge(getStartVertexIndex(),getEndVertexIndex()); }
-
-
- /*! tests if the start vertex of the edge is regular */
- __forceinline PatchType vertexType() const
- {
- const HalfEdge* p = this;
- size_t face_valence = 0;
- bool hasBorder = false;
-
- do
- {
- /* we need subdivision to handle edge creases */
- if (p->hasOpposite() && p->edge_crease_weight > 0.0f)
- return COMPLEX_PATCH;
-
- face_valence++;
-
- /* test for quad */
- const HalfEdge* pp = p;
- pp = pp->next(); if (pp == p) return COMPLEX_PATCH;
- pp = pp->next(); if (pp == p) return COMPLEX_PATCH;
- pp = pp->next(); if (pp == p) return COMPLEX_PATCH;
- pp = pp->next(); if (pp != p) return COMPLEX_PATCH;
-
- /* continue with next face */
- p = p->prev();
- if (likely(p->hasOpposite()))
- p = p->opposite();
-
- /* if there is no opposite go the long way to the other side of the border */
- else
- {
- face_valence++;
- hasBorder = true;
- p = this;
- while (p->hasOpposite())
- p = p->rotate();
- }
- } while (p != this);
-
- /* calculate vertex type */
- if (face_valence == 2 && hasBorder) {
- if (vertex_crease_weight == 0.0f ) return REGULAR_QUAD_PATCH;
- else if (vertex_crease_weight == float(inf)) return REGULAR_QUAD_PATCH;
- else return COMPLEX_PATCH;
- }
- else if (vertex_crease_weight != 0.0f) return COMPLEX_PATCH;
- else if (face_valence == 3 && hasBorder) return REGULAR_QUAD_PATCH;
- else if (face_valence == 4 && !hasBorder) return REGULAR_QUAD_PATCH;
- else return IRREGULAR_QUAD_PATCH;
- }
-
- /*! tests if this edge is part of a bilinear patch */
- __forceinline bool bilinearVertex() const {
- return vertex_crease_weight == float(inf) && edge_crease_weight == float(inf);
- }
-
- /*! calculates the type of the patch */
- __forceinline PatchType patchType() const
- {
- const HalfEdge* p = this;
- PatchType ret = REGULAR_QUAD_PATCH;
- bool bilinear = true;
-
- ret = max(ret,p->vertexType());
- bilinear &= p->bilinearVertex();
- if ((p = p->next()) == this) return COMPLEX_PATCH;
-
- ret = max(ret,p->vertexType());
- bilinear &= p->bilinearVertex();
- if ((p = p->next()) == this) return COMPLEX_PATCH;
-
- ret = max(ret,p->vertexType());
- bilinear &= p->bilinearVertex();
- if ((p = p->next()) == this) return COMPLEX_PATCH;
-
- ret = max(ret,p->vertexType());
- bilinear &= p->bilinearVertex();
- if ((p = p->next()) != this) return COMPLEX_PATCH;
-
- if (bilinear) return BILINEAR_PATCH;
- return ret;
- }
-
- /*! tests if the face is a regular b-spline face */
- __forceinline bool isRegularFace() const {
- return patch_type == REGULAR_QUAD_PATCH;
- }
-
- /*! tests if the face can be diced (using bspline or gregory patch) */
- __forceinline bool isGregoryFace() const {
- return patch_type == IRREGULAR_QUAD_PATCH || patch_type == REGULAR_QUAD_PATCH;
- }
-
- /*! tests if the base vertex of this half edge is a corner vertex */
- __forceinline bool isCorner() const {
- return !hasOpposite() && !prev()->hasOpposite();
- }
-
- /*! tests if the vertex is attached to any border */
- __forceinline bool vertexHasBorder() const
- {
- const HalfEdge* p = this;
- do {
- if (!p->hasOpposite()) return true;
- p = p->rotate();
- } while (p != this);
- return false;
- }
-
- /*! tests if the face this half edge belongs to has some border */
- __forceinline bool faceHasBorder() const
- {
- const HalfEdge* p = this;
- do {
- if (p->vertexHasBorder()) return true;
- p = p->next();
- } while (p != this);
- return false;
- }
-
- /*! calculates conservative bounds of a catmull clark subdivision face */
- __forceinline BBox3fa bounds(const BufferView<Vec3fa>& vertices) const
- {
- BBox3fa bounds = this->get1RingBounds(vertices);
- for (const HalfEdge* p=this->next(); p!=this; p=p->next())
- bounds.extend(p->get1RingBounds(vertices));
- return bounds;
- }
-
- /*! tests if this is a valid patch */
- __forceinline bool valid(const BufferView<Vec3fa>& vertices) const
- {
- size_t N = 1;
- if (!this->validRing(vertices)) return false;
- for (const HalfEdge* p=this->next(); p!=this; p=p->next(), N++) {
- if (!p->validRing(vertices)) return false;
- }
- return N >= 3 && N <= MAX_PATCH_VALENCE;
- }
-
- /*! counts number of polygon edges */
- __forceinline unsigned int numEdges() const
- {
- unsigned int N = 1;
- for (const HalfEdge* p=this->next(); p!=this; p=p->next(), N++);
- return N;
- }
-
- /*! calculates face and edge valence */
- __forceinline void calculateFaceValenceAndEdgeValence(size_t& faceValence, size_t& edgeValence) const
- {
- faceValence = 0;
- edgeValence = 0;
-
- const HalfEdge* p = this;
- do
- {
- /* calculate bounds of current face */
- unsigned int numEdges = p->numEdges();
- assert(numEdges >= 3);
- edgeValence += numEdges-2;
-
- faceValence++;
- p = p->prev();
-
- /* continue with next face */
- if (likely(p->hasOpposite()))
- p = p->opposite();
-
- /* if there is no opposite go the long way to the other side of the border */
- else {
- faceValence++;
- edgeValence++;
- p = this;
- while (p->hasOpposite())
- p = p->opposite()->next();
- }
-
- } while (p != this);
- }
-
- /*! stream output */
- friend __forceinline std::ostream &operator<<(std::ostream &o, const HalfEdge &h)
- {
- return o << "{ " <<
- "vertex = " << h.vtx_index << ", " << //" -> " << h.next()->vtx_index << ", " <<
- "prev = " << h.prev_half_edge_ofs << ", " <<
- "next = " << h.next_half_edge_ofs << ", " <<
- "opposite = " << h.opposite_half_edge_ofs << ", " <<
- "edge_crease = " << h.edge_crease_weight << ", " <<
- "vertex_crease = " << h.vertex_crease_weight << ", " <<
- //"edge_level = " << h.edge_level <<
- " }";
- }
-
- private:
-
- /*! calculates the bounds of the face associated with the half-edge */
- __forceinline BBox3fa getFaceBounds(const BufferView<Vec3fa>& vertices) const
- {
- BBox3fa b = vertices[getStartVertexIndex()];
- for (const HalfEdge* p = next(); p!=this; p=p->next()) {
- b.extend(vertices[p->getStartVertexIndex()]);
- }
- return b;
- }
-
- /*! calculates the bounds of the 1-ring associated with the vertex of the half-edge */
- __forceinline BBox3fa get1RingBounds(const BufferView<Vec3fa>& vertices) const
- {
- BBox3fa bounds = empty;
- const HalfEdge* p = this;
- do
- {
- /* calculate bounds of current face */
- bounds.extend(p->getFaceBounds(vertices));
- p = p->prev();
-
- /* continue with next face */
- if (likely(p->hasOpposite()))
- p = p->opposite();
-
- /* if there is no opposite go the long way to the other side of the border */
- else {
- p = this;
- while (p->hasOpposite())
- p = p->opposite()->next();
- }
-
- } while (p != this);
-
- return bounds;
- }
-
- /*! tests if this is a valid face */
- __forceinline bool validFace(const BufferView<Vec3fa>& vertices, size_t& N) const
- {
- const Vec3fa v = vertices[getStartVertexIndex()];
- if (!isvalid(v)) return false;
- size_t n = 1;
- for (const HalfEdge* p = next(); p!=this; p=p->next(), n++) {
- const Vec3fa v = vertices[p->getStartVertexIndex()];
- if (!isvalid(v)) return false;
- }
- N += n-2;
- return n >= 3 && n <= MAX_PATCH_VALENCE;
- }
-
- /*! tests if this is a valid ring */
- __forceinline bool validRing(const BufferView<Vec3fa>& vertices) const
- {
- size_t faceValence = 0;
- size_t edgeValence = 0;
-
- const HalfEdge* p = this;
- do
- {
- /* calculate bounds of current face */
- if (!p->validFace(vertices,edgeValence))
- return false;
-
- faceValence++;
- p = p->prev();
-
- /* continue with next face */
- if (likely(p->hasOpposite()))
- p = p->opposite();
-
- /* if there is no opposite go the long way to the other side of the border */
- else {
- faceValence++;
- edgeValence++;
- p = this;
- while (p->hasOpposite())
- p = p->opposite()->next();
- }
-
- } while (p != this);
-
- return faceValence <= MAX_RING_FACE_VALENCE && edgeValence <= MAX_RING_EDGE_VALENCE;
- }
-
- private:
- unsigned int vtx_index; //!< index of edge start vertex
- int next_half_edge_ofs; //!< relative offset to next half edge of face
- int prev_half_edge_ofs; //!< relative offset to previous half edge of face
- int opposite_half_edge_ofs; //!< relative offset to opposite half edge
-
- public:
- float edge_crease_weight; //!< crease weight attached to edge
- float vertex_crease_weight; //!< crease weight attached to start vertex
- float edge_level; //!< subdivision factor for edge
- PatchType patch_type; //!< stores type of subdiv patch
- VertexType vertex_type; //!< stores type of the start vertex
- char align[2];
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/hermite_curve.h b/thirdparty/embree-aarch64/kernels/subdiv/hermite_curve.h
deleted file mode 100644
index 9fab79cf0c..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/hermite_curve.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/default.h"
-#include "bezier_curve.h"
-
-namespace embree
-{
- template<typename Vertex>
- struct HermiteCurveT : BezierCurveT<Vertex>
- {
- __forceinline HermiteCurveT() {}
-
- __forceinline HermiteCurveT(const BezierCurveT<Vertex>& curve)
- : BezierCurveT<Vertex>(curve) {}
-
- __forceinline HermiteCurveT(const Vertex& v0, const Vertex& t0, const Vertex& v1, const Vertex& t1)
- : BezierCurveT<Vertex>(v0,madd(1.0f/3.0f,t0,v0),nmadd(1.0f/3.0f,t1,v1),v1) {}
-
- __forceinline HermiteCurveT<Vec3ff> xfm_pr(const LinearSpace3fa& space, const Vec3fa& p) const
- {
- const Vec3ff q0(xfmVector(space,this->v0-p), this->v0.w);
- const Vec3ff q1(xfmVector(space,this->v1-p), this->v1.w);
- const Vec3ff q2(xfmVector(space,this->v2-p), this->v2.w);
- const Vec3ff q3(xfmVector(space,this->v3-p), this->v3.w);
- return BezierCurveT<Vec3ff>(q0,q1,q2,q3);
- }
- };
-
- __forceinline HermiteCurveT<Vec3ff> enlargeRadiusToMinWidth(const IntersectContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const HermiteCurveT<Vec3ff>& curve) {
- return HermiteCurveT<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,BezierCurveT<Vec3ff>(curve)));
- }
-
- typedef HermiteCurveT<Vec3fa> HermiteCurve3fa;
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/linear_bezier_patch.h b/thirdparty/embree-aarch64/kernels/subdiv/linear_bezier_patch.h
deleted file mode 100644
index f4a854af7f..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/linear_bezier_patch.h
+++ /dev/null
@@ -1,403 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "bezier_curve.h"
-
-namespace embree
-{
- namespace isa
- {
- template<typename V>
- struct TensorLinearQuadraticBezierSurface
- {
- QuadraticBezierCurve<V> L;
- QuadraticBezierCurve<V> R;
-
- __forceinline TensorLinearQuadraticBezierSurface() {}
-
- __forceinline TensorLinearQuadraticBezierSurface(const TensorLinearQuadraticBezierSurface<V>& curve)
- : L(curve.L), R(curve.R) {}
-
- __forceinline TensorLinearQuadraticBezierSurface& operator= (const TensorLinearQuadraticBezierSurface& other) {
- L = other.L; R = other.R; return *this;
- }
-
- __forceinline TensorLinearQuadraticBezierSurface(const QuadraticBezierCurve<V>& L, const QuadraticBezierCurve<V>& R)
- : L(L), R(R) {}
-
- __forceinline BBox<V> bounds() const {
- return merge(L.bounds(),R.bounds());
- }
- };
-
- template<>
- struct TensorLinearQuadraticBezierSurface<Vec2fa>
- {
- QuadraticBezierCurve<vfloat4> LR;
-
- __forceinline TensorLinearQuadraticBezierSurface() {}
-
- __forceinline TensorLinearQuadraticBezierSurface(const TensorLinearQuadraticBezierSurface<Vec2fa>& curve)
- : LR(curve.LR) {}
-
- __forceinline TensorLinearQuadraticBezierSurface& operator= (const TensorLinearQuadraticBezierSurface& other) {
- LR = other.LR; return *this;
- }
-
- __forceinline TensorLinearQuadraticBezierSurface(const QuadraticBezierCurve<vfloat4>& LR)
- : LR(LR) {}
-
- __forceinline BBox<Vec2fa> bounds() const
- {
- const BBox<vfloat4> b = LR.bounds();
- const BBox<Vec2fa> bl(Vec2fa(b.lower),Vec2fa(b.upper));
- const BBox<Vec2fa> br(Vec2fa(shuffle<2,3,2,3>(b.lower)),Vec2fa(shuffle<2,3,2,3>(b.upper)));
- return merge(bl,br);
- }
- };
-
- template<typename V>
- struct TensorLinearCubicBezierSurface
- {
- CubicBezierCurve<V> L;
- CubicBezierCurve<V> R;
-
- __forceinline TensorLinearCubicBezierSurface() {}
-
- __forceinline TensorLinearCubicBezierSurface(const TensorLinearCubicBezierSurface& curve)
- : L(curve.L), R(curve.R) {}
-
- __forceinline TensorLinearCubicBezierSurface& operator= (const TensorLinearCubicBezierSurface& other) {
- L = other.L; R = other.R; return *this;
- }
-
- __forceinline TensorLinearCubicBezierSurface(const CubicBezierCurve<V>& L, const CubicBezierCurve<V>& R)
- : L(L), R(R) {}
-
- template<template<typename T> class SourceCurve>
- __forceinline static TensorLinearCubicBezierSurface fromCenterAndNormalCurve(const SourceCurve<Vec3ff>& center, const SourceCurve<Vec3fa>& normal)
- {
- SourceCurve<Vec3ff> vcurve = center;
- SourceCurve<Vec3fa> ncurve = normal;
-
- /* here we construct a patch which follows the curve l(t) =
- * p(t) +/- r(t)*normalize(cross(n(t),dp(t))) */
-
- const Vec3ff p0 = vcurve.eval(0.0f);
- const Vec3ff dp0 = vcurve.eval_du(0.0f);
- const Vec3ff ddp0 = vcurve.eval_dudu(0.0f);
-
- const Vec3fa n0 = ncurve.eval(0.0f);
- const Vec3fa dn0 = ncurve.eval_du(0.0f);
-
- const Vec3ff p1 = vcurve.eval(1.0f);
- const Vec3ff dp1 = vcurve.eval_du(1.0f);
- const Vec3ff ddp1 = vcurve.eval_dudu(1.0f);
-
- const Vec3fa n1 = ncurve.eval(1.0f);
- const Vec3fa dn1 = ncurve.eval_du(1.0f);
-
- const Vec3fa bt0 = cross(n0,dp0);
- const Vec3fa dbt0 = cross(dn0,dp0) + cross(n0,ddp0);
-
- const Vec3fa bt1 = cross(n1,dp1);
- const Vec3fa dbt1 = cross(dn1,dp1) + cross(n1,ddp1);
-
- const Vec3fa k0 = normalize(bt0);
- const Vec3fa dk0 = dnormalize(bt0,dbt0);
-
- const Vec3fa k1 = normalize(bt1);
- const Vec3fa dk1 = dnormalize(bt1,dbt1);
-
- const Vec3fa l0 = p0 - p0.w*k0;
- const Vec3fa dl0 = dp0 - (dp0.w*k0 + p0.w*dk0);
-
- const Vec3fa r0 = p0 + p0.w*k0;
- const Vec3fa dr0 = dp0 + (dp0.w*k0 + p0.w*dk0);
-
- const Vec3fa l1 = p1 - p1.w*k1;
- const Vec3fa dl1 = dp1 - (dp1.w*k1 + p1.w*dk1);
-
- const Vec3fa r1 = p1 + p1.w*k1;
- const Vec3fa dr1 = dp1 + (dp1.w*k1 + p1.w*dk1);
-
- const float scale = 1.0f/3.0f;
- CubicBezierCurve<V> L(l0,l0+scale*dl0,l1-scale*dl1,l1);
- CubicBezierCurve<V> R(r0,r0+scale*dr0,r1-scale*dr1,r1);
- return TensorLinearCubicBezierSurface(L,R);
- }
-
- __forceinline BBox<V> bounds() const {
- return merge(L.bounds(),R.bounds());
- }
-
- __forceinline BBox3fa accurateBounds() const {
- return merge(L.accurateBounds(),R.accurateBounds());
- }
-
- __forceinline CubicBezierCurve<Interval1f> reduce_v() const {
- return merge(CubicBezierCurve<Interval<V>>(L),CubicBezierCurve<Interval<V>>(R));
- }
-
- __forceinline LinearBezierCurve<Interval1f> reduce_u() const {
- return LinearBezierCurve<Interval1f>(L.bounds(),R.bounds());
- }
-
- __forceinline TensorLinearCubicBezierSurface<float> xfm(const V& dx) const {
- return TensorLinearCubicBezierSurface<float>(L.xfm(dx),R.xfm(dx));
- }
-
- __forceinline TensorLinearCubicBezierSurface<vfloatx> vxfm(const V& dx) const {
- return TensorLinearCubicBezierSurface<vfloatx>(L.vxfm(dx),R.vxfm(dx));
- }
-
- __forceinline TensorLinearCubicBezierSurface<float> xfm(const V& dx, const V& p) const {
- return TensorLinearCubicBezierSurface<float>(L.xfm(dx,p),R.xfm(dx,p));
- }
-
- __forceinline TensorLinearCubicBezierSurface<Vec3fa> xfm(const LinearSpace3fa& space) const {
- return TensorLinearCubicBezierSurface(L.xfm(space),R.xfm(space));
- }
-
- __forceinline TensorLinearCubicBezierSurface<Vec3fa> xfm(const LinearSpace3fa& space, const Vec3fa& p) const {
- return TensorLinearCubicBezierSurface(L.xfm(space,p),R.xfm(space,p));
- }
-
- __forceinline TensorLinearCubicBezierSurface<Vec3fa> xfm(const LinearSpace3fa& space, const Vec3fa& p, const float s) const {
- return TensorLinearCubicBezierSurface(L.xfm(space,p,s),R.xfm(space,p,s));
- }
-
- __forceinline TensorLinearCubicBezierSurface clip_u(const Interval1f& u) const {
- return TensorLinearCubicBezierSurface(L.clip(u),R.clip(u));
- }
-
- __forceinline TensorLinearCubicBezierSurface clip_v(const Interval1f& v) const {
- return TensorLinearCubicBezierSurface(clerp(L,R,V(v.lower)),clerp(L,R,V(v.upper)));
- }
-
- __forceinline TensorLinearCubicBezierSurface clip(const Interval1f& u, const Interval1f& v) const {
- return clip_v(v).clip_u(u);
- }
-
- __forceinline void split_u(TensorLinearCubicBezierSurface& left, TensorLinearCubicBezierSurface& right, const float u = 0.5f) const
- {
- CubicBezierCurve<V> L0,L1; L.split(L0,L1,u);
- CubicBezierCurve<V> R0,R1; R.split(R0,R1,u);
- new (&left ) TensorLinearCubicBezierSurface(L0,R0);
- new (&right) TensorLinearCubicBezierSurface(L1,R1);
- }
-
- __forceinline TensorLinearCubicBezierSurface<Vec2vfx> vsplit_u(vboolx& valid, const BBox1f& u) const {
- valid = true; clear(valid,VSIZEX-1);
- return TensorLinearCubicBezierSurface<Vec2vfx>(L.split(u),R.split(u));
- }
-
- __forceinline V eval(const float u, const float v) const {
- return clerp(L,R,V(v)).eval(u);
- }
-
- __forceinline V eval_du(const float u, const float v) const {
- return clerp(L,R,V(v)).eval_dt(u);
- }
-
- __forceinline V eval_dv(const float u, const float v) const {
- return (R-L).eval(u);
- }
-
- __forceinline void eval(const float u, const float v, V& p, V& dpdu, V& dpdv) const
- {
- V p0, dp0du; L.eval(u,p0,dp0du);
- V p1, dp1du; R.eval(u,p1,dp1du);
- p = lerp(p0,p1,v);
- dpdu = lerp(dp0du,dp1du,v);
- dpdv = p1-p0;
- }
-
- __forceinline TensorLinearQuadraticBezierSurface<V> derivative_u() const {
- return TensorLinearQuadraticBezierSurface<V>(L.derivative(),R.derivative());
- }
-
- __forceinline CubicBezierCurve<V> derivative_v() const {
- return R-L;
- }
-
- __forceinline V axis_u() const {
- return (L.end()-L.begin())+(R.end()-R.begin());
- }
-
- __forceinline V axis_v() const {
- return (R.begin()-L.begin())+(R.end()-L.end());
- }
-
- friend embree_ostream operator<<(embree_ostream cout, const TensorLinearCubicBezierSurface& a)
- {
- return cout << "TensorLinearCubicBezierSurface" << embree_endl
- << "{" << embree_endl
- << " L = " << a.L << ", " << embree_endl
- << " R = " << a.R << embree_endl
- << "}";
- }
-
- friend __forceinline TensorLinearCubicBezierSurface clerp(const TensorLinearCubicBezierSurface& a, const TensorLinearCubicBezierSurface& b, const float t) {
- return TensorLinearCubicBezierSurface(clerp(a.L,b.L,V(t)), clerp(a.R,b.R,V(t)));
- }
- };
-
- template<>
- struct TensorLinearCubicBezierSurface<Vec2fa>
- {
- CubicBezierCurve<vfloat4> LR;
-
- __forceinline TensorLinearCubicBezierSurface() {}
-
- __forceinline TensorLinearCubicBezierSurface(const TensorLinearCubicBezierSurface& curve)
- : LR(curve.LR) {}
-
- __forceinline TensorLinearCubicBezierSurface& operator= (const TensorLinearCubicBezierSurface& other) {
- LR = other.LR; return *this;
- }
-
- __forceinline TensorLinearCubicBezierSurface(const CubicBezierCurve<vfloat4>& LR)
- : LR(LR) {}
-
- __forceinline TensorLinearCubicBezierSurface(const CubicBezierCurve<Vec2fa>& L, const CubicBezierCurve<Vec2fa>& R)
- : LR(shuffle<0,1,0,1>(vfloat4(L.v0),vfloat4(R.v0)),shuffle<0,1,0,1>(vfloat4(L.v1),vfloat4(R.v1)),shuffle<0,1,0,1>(vfloat4(L.v2),vfloat4(R.v2)),shuffle<0,1,0,1>(vfloat4(L.v3),vfloat4(R.v3))) {}
-
- __forceinline CubicBezierCurve<Vec2fa> getL() const {
- return CubicBezierCurve<Vec2fa>(Vec2fa(LR.v0),Vec2fa(LR.v1),Vec2fa(LR.v2),Vec2fa(LR.v3));
- }
-
- __forceinline CubicBezierCurve<Vec2fa> getR() const {
- return CubicBezierCurve<Vec2fa>(Vec2fa(shuffle<2,3,2,3>(LR.v0)),Vec2fa(shuffle<2,3,2,3>(LR.v1)),Vec2fa(shuffle<2,3,2,3>(LR.v2)),Vec2fa(shuffle<2,3,2,3>(LR.v3)));
- }
-
- __forceinline BBox<Vec2fa> bounds() const
- {
- const BBox<vfloat4> b = LR.bounds();
- const BBox<Vec2fa> bl(Vec2fa(b.lower),Vec2fa(b.upper));
- const BBox<Vec2fa> br(Vec2fa(shuffle<2,3,2,3>(b.lower)),Vec2fa(shuffle<2,3,2,3>(b.upper)));
- return merge(bl,br);
- }
-
- __forceinline BBox1f bounds(const Vec2fa& axis) const
- {
- const CubicBezierCurve<vfloat4> LRx = LR;
- const CubicBezierCurve<vfloat4> LRy(shuffle<1,0,3,2>(LR.v0),shuffle<1,0,3,2>(LR.v1),shuffle<1,0,3,2>(LR.v2),shuffle<1,0,3,2>(LR.v3));
- const CubicBezierCurve<vfloat4> LRa = cmadd(shuffle<0>(vfloat4(axis)),LRx,shuffle<1>(vfloat4(axis))*LRy);
- const BBox<vfloat4> Lb = LRa.bounds();
- const BBox<vfloat4> Rb(shuffle<3>(Lb.lower),shuffle<3>(Lb.upper));
- const BBox<vfloat4> b = merge(Lb,Rb);
- return BBox1f(b.lower[0],b.upper[0]);
- }
-
- __forceinline TensorLinearCubicBezierSurface<float> xfm(const Vec2fa& dx) const
- {
- const CubicBezierCurve<vfloat4> LRx = LR;
- const CubicBezierCurve<vfloat4> LRy(shuffle<1,0,3,2>(LR.v0),shuffle<1,0,3,2>(LR.v1),shuffle<1,0,3,2>(LR.v2),shuffle<1,0,3,2>(LR.v3));
- const CubicBezierCurve<vfloat4> LRa = cmadd(shuffle<0>(vfloat4(dx)),LRx,shuffle<1>(vfloat4(dx))*LRy);
- return TensorLinearCubicBezierSurface<float>(CubicBezierCurve<float>(LRa.v0[0],LRa.v1[0],LRa.v2[0],LRa.v3[0]),
- CubicBezierCurve<float>(LRa.v0[2],LRa.v1[2],LRa.v2[2],LRa.v3[2]));
- }
-
- __forceinline TensorLinearCubicBezierSurface<float> xfm(const Vec2fa& dx, const Vec2fa& p) const
- {
- const vfloat4 pxyxy = shuffle<0,1,0,1>(vfloat4(p));
- const CubicBezierCurve<vfloat4> LRx = LR-pxyxy;
- const CubicBezierCurve<vfloat4> LRy(shuffle<1,0,3,2>(LR.v0),shuffle<1,0,3,2>(LR.v1),shuffle<1,0,3,2>(LR.v2),shuffle<1,0,3,2>(LR.v3));
- const CubicBezierCurve<vfloat4> LRa = cmadd(shuffle<0>(vfloat4(dx)),LRx,shuffle<1>(vfloat4(dx))*LRy);
- return TensorLinearCubicBezierSurface<float>(CubicBezierCurve<float>(LRa.v0[0],LRa.v1[0],LRa.v2[0],LRa.v3[0]),
- CubicBezierCurve<float>(LRa.v0[2],LRa.v1[2],LRa.v2[2],LRa.v3[2]));
- }
-
- __forceinline TensorLinearCubicBezierSurface clip_u(const Interval1f& u) const {
- return TensorLinearCubicBezierSurface(LR.clip(u));
- }
-
- __forceinline TensorLinearCubicBezierSurface clip_v(const Interval1f& v) const
- {
- const CubicBezierCurve<vfloat4> LL(shuffle<0,1,0,1>(LR.v0),shuffle<0,1,0,1>(LR.v1),shuffle<0,1,0,1>(LR.v2),shuffle<0,1,0,1>(LR.v3));
- const CubicBezierCurve<vfloat4> RR(shuffle<2,3,2,3>(LR.v0),shuffle<2,3,2,3>(LR.v1),shuffle<2,3,2,3>(LR.v2),shuffle<2,3,2,3>(LR.v3));
- return TensorLinearCubicBezierSurface(clerp(LL,RR,vfloat4(v.lower,v.lower,v.upper,v.upper)));
- }
-
- __forceinline TensorLinearCubicBezierSurface clip(const Interval1f& u, const Interval1f& v) const {
- return clip_v(v).clip_u(u);
- }
-
- __forceinline void split_u(TensorLinearCubicBezierSurface& left, TensorLinearCubicBezierSurface& right, const float u = 0.5f) const
- {
- CubicBezierCurve<vfloat4> LR0,LR1; LR.split(LR0,LR1,u);
- new (&left ) TensorLinearCubicBezierSurface(LR0);
- new (&right) TensorLinearCubicBezierSurface(LR1);
- }
-
- __forceinline TensorLinearCubicBezierSurface<Vec2vfx> vsplit_u(vboolx& valid, const BBox1f& u) const {
- valid = true; clear(valid,VSIZEX-1);
- return TensorLinearCubicBezierSurface<Vec2vfx>(getL().split(u),getR().split(u));
- }
-
- __forceinline Vec2fa eval(const float u, const float v) const
- {
- const vfloat4 p = LR.eval(u);
- return Vec2fa(lerp(shuffle<0,1,0,1>(p),shuffle<2,3,2,3>(p),v));
- }
-
- __forceinline Vec2fa eval_du(const float u, const float v) const
- {
- const vfloat4 dpdu = LR.eval_dt(u);
- return Vec2fa(lerp(shuffle<0,1,0,1>(dpdu),shuffle<2,3,2,3>(dpdu),v));
- }
-
- __forceinline Vec2fa eval_dv(const float u, const float v) const
- {
- const vfloat4 p = LR.eval(u);
- return Vec2fa(shuffle<2,3,2,3>(p)-shuffle<0,1,0,1>(p));
- }
-
- __forceinline void eval(const float u, const float v, Vec2fa& p, Vec2fa& dpdu, Vec2fa& dpdv) const
- {
- vfloat4 p0, dp0du; LR.eval(u,p0,dp0du);
- p = Vec2fa(lerp(shuffle<0,1,0,1>(p0),shuffle<2,3,2,3>(p0),v));
- dpdu = Vec2fa(lerp(shuffle<0,1,0,1>(dp0du),shuffle<2,3,2,3>(dp0du),v));
- dpdv = Vec2fa(shuffle<2,3,2,3>(p0)-shuffle<0,1,0,1>(p0));
- }
-
- __forceinline TensorLinearQuadraticBezierSurface<Vec2fa> derivative_u() const {
- return TensorLinearQuadraticBezierSurface<Vec2fa>(LR.derivative());
- }
-
- __forceinline CubicBezierCurve<Vec2fa> derivative_v() const {
- return getR()-getL();
- }
-
- __forceinline Vec2fa axis_u() const
- {
- const CubicBezierCurve<Vec2fa> L = getL();
- const CubicBezierCurve<Vec2fa> R = getR();
- return (L.end()-L.begin())+(R.end()-R.begin());
- }
-
- __forceinline Vec2fa axis_v() const
- {
- const CubicBezierCurve<Vec2fa> L = getL();
- const CubicBezierCurve<Vec2fa> R = getR();
- return (R.begin()-L.begin())+(R.end()-L.end());
- }
-
- friend embree_ostream operator<<(embree_ostream cout, const TensorLinearCubicBezierSurface& a)
- {
- return cout << "TensorLinearCubicBezierSurface" << embree_endl
- << "{" << embree_endl
- << " L = " << a.getL() << ", " << embree_endl
- << " R = " << a.getR() << embree_endl
- << "}";
- }
- };
-
- typedef TensorLinearCubicBezierSurface<float> TensorLinearCubicBezierSurface1f;
- typedef TensorLinearCubicBezierSurface<Vec2fa> TensorLinearCubicBezierSurface2fa;
- typedef TensorLinearCubicBezierSurface<Vec3fa> TensorLinearCubicBezierSurface3fa;
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/patch.h b/thirdparty/embree-aarch64/kernels/subdiv/patch.h
deleted file mode 100644
index d58241b96d..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/patch.h
+++ /dev/null
@@ -1,371 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "catmullclark_patch.h"
-#include "bilinear_patch.h"
-#include "bspline_patch.h"
-#include "bezier_patch.h"
-#include "gregory_patch.h"
-#include "tessellation_cache.h"
-
-#if 1
-#define PATCH_DEBUG_SUBDIVISION(ptr,x,y,z)
-#else
-#define PATCH_DEBUG_SUBDIVISION(ptr,x,y,z) \
- { \
- size_t hex = (size_t)ptr; \
- for (size_t i=0; i<4; i++) hex = hex ^ (hex >> 8); \
- const float c = (float)(((hex >> 0) ^ (hex >> 4) ^ (hex >> 8) ^ (hex >> 12) ^ (hex >> 16))&0xf)/15.0f; \
- if (P) *P = Vertex(0.5f+0.5f*x,0.5f+0.5f*y,0.5f+0.5f*z,0.0f); \
- }
-#endif
-
-#define PATCH_MAX_CACHE_DEPTH 2
-//#define PATCH_MIN_RESOLUTION 1 // FIXME: not yet completely implemented
-#define PATCH_MAX_EVAL_DEPTH_IRREGULAR 10 // maximum evaluation depth at irregular vertices (has to be larger or equal than PATCH_MAX_CACHE_DEPTH)
-#define PATCH_MAX_EVAL_DEPTH_CREASE 10 // maximum evaluation depth at crease features (has to be larger or equal than PATCH_MAX_CACHE_DEPTH)
-#define PATCH_USE_GREGORY 1 // 0 = no gregory, 1 = fill, 2 = as early as possible
-
-#if PATCH_USE_GREGORY==2
-#define PATCH_USE_BEZIER_PATCH 1 // enable use of bezier instead of b-spline patches
-#else
-#define PATCH_USE_BEZIER_PATCH 0 // enable use of bezier instead of b-spline patches
-#endif
-
-#if PATCH_USE_BEZIER_PATCH
-# define RegularPatch BezierPatch
-# define RegularPatchT BezierPatchT<Vertex,Vertex_t>
-#else
-# define RegularPatch BSplinePatch
-# define RegularPatchT BSplinePatchT<Vertex,Vertex_t>
-#endif
-
-#if PATCH_USE_GREGORY
-#define IrregularFillPatch GregoryPatch
-#define IrregularFillPatchT GregoryPatchT<Vertex,Vertex_t>
-#else
-#define IrregularFillPatch BilinearPatch
-#define IrregularFillPatchT BilinearPatchT<Vertex,Vertex_t>
-#endif
-
-namespace embree
-{
- template<typename Vertex, typename Vertex_t = Vertex>
- struct __aligned(64) PatchT
- {
- public:
-
- typedef GeneralCatmullClarkPatchT<Vertex,Vertex_t> GeneralCatmullClarkPatch;
- typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
- typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClarkRing;
- typedef BezierCurveT<Vertex> BezierCurve;
-
- enum Type {
- INVALID_PATCH = 0,
- BILINEAR_PATCH = 1,
- BSPLINE_PATCH = 2,
- BEZIER_PATCH = 3,
- GREGORY_PATCH = 4,
- SUBDIVIDED_GENERAL_PATCH = 7,
- SUBDIVIDED_QUAD_PATCH = 8,
- EVAL_PATCH = 9,
- };
-
- struct Ref
- {
- __forceinline Ref(void* p = nullptr)
- : ptr((size_t)p) {}
-
- __forceinline operator bool() const { return ptr != 0; }
- __forceinline operator size_t() const { return ptr; }
-
- __forceinline Ref (Type ty, void* in)
- : ptr(((size_t)in)+ty) { assert((((size_t)in) & 0xF) == 0); }
-
- __forceinline Type type () const { return (Type)(ptr & 0xF); }
- __forceinline void* object() const { return (void*) (ptr & ~0xF); }
-
- size_t ptr;
- };
-
- struct EvalPatch
- {
- /* creates EvalPatch from a CatmullClarkPatch */
- template<typename Allocator>
- __noinline static Ref create(const Allocator& alloc, const CatmullClarkPatch& patch)
- {
- size_t ofs = 0, bytes = patch.bytes();
- void* ptr = alloc(bytes);
- patch.serialize(ptr,ofs);
- assert(ofs == bytes);
- return Ref(EVAL_PATCH, ptr);
- }
- };
-
- struct BilinearPatch
- {
- /* creates BilinearPatch from a CatmullClarkPatch */
- template<typename Allocator>
- __noinline static Ref create(const Allocator& alloc, const CatmullClarkPatch& patch,
- const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3) {
- return Ref(BILINEAR_PATCH, new (alloc(sizeof(BilinearPatch))) BilinearPatch(patch));
- }
-
- __forceinline BilinearPatch (const CatmullClarkPatch& patch)
- : patch(patch) {}
-
- /* creates BilinearPatch from 4 vertices */
- template<typename Allocator>
- __noinline static Ref create(const Allocator& alloc, const HalfEdge* edge, const char* vertices, size_t stride) {
- return Ref(BILINEAR_PATCH, new (alloc(sizeof(BilinearPatch))) BilinearPatch(edge,vertices,stride));
- }
-
- __forceinline BilinearPatch (const HalfEdge* edge, const char* vertices, size_t stride)
- : patch(edge,vertices,stride) {}
-
- public:
- BilinearPatchT<Vertex,Vertex_t> patch;
- };
-
- struct BSplinePatch
- {
- /* creates BSplinePatch from a half edge */
- template<typename Allocator>
- __noinline static Ref create(const Allocator& alloc, const HalfEdge* edge, const char* vertices, size_t stride) {
- return Ref(BSPLINE_PATCH, new (alloc(sizeof(BSplinePatch))) BSplinePatch(edge,vertices,stride));
- }
-
- __forceinline BSplinePatch (const HalfEdge* edge, const char* vertices, size_t stride)
- : patch(edge,vertices,stride) {}
-
- /* creates BSplinePatch from a CatmullClarkPatch */
- template<typename Allocator>
- __noinline static Ref create(const Allocator& alloc, const CatmullClarkPatch& patch,
- const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3) {
- return Ref(BSPLINE_PATCH, new (alloc(sizeof(BSplinePatch))) BSplinePatch(patch,border0,border1,border2,border3));
- }
-
- __forceinline BSplinePatch (const CatmullClarkPatch& patch, const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3)
- : patch(patch,border0,border1,border2,border3) {}
-
- public:
- BSplinePatchT<Vertex,Vertex_t> patch;
- };
-
- struct BezierPatch
- {
- /* creates BezierPatch from a half edge */
- template<typename Allocator>
- __noinline static Ref create(const Allocator& alloc, const HalfEdge* edge, const char* vertices, size_t stride) {
- return Ref(BEZIER_PATCH, new (alloc(sizeof(BezierPatch))) BezierPatch(edge,vertices,stride));
- }
-
- __forceinline BezierPatch (const HalfEdge* edge, const char* vertices, size_t stride)
- : patch(edge,vertices,stride) {}
-
- /* creates Bezier from a CatmullClarkPatch */
- template<typename Allocator>
- __noinline static Ref create(const Allocator& alloc, const CatmullClarkPatch& patch,
- const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3) {
- return Ref(BEZIER_PATCH, new (alloc(sizeof(BezierPatch))) BezierPatch(patch,border0,border1,border2,border3));
- }
-
- __forceinline BezierPatch (const CatmullClarkPatch& patch, const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3)
- : patch(patch,border0,border1,border2,border3) {}
-
- public:
- BezierPatchT<Vertex,Vertex_t> patch;
- };
-
- struct GregoryPatch
- {
- /* creates GregoryPatch from half edge */
- template<typename Allocator>
- __noinline static Ref create(const Allocator& alloc, const HalfEdge* edge, const char* vertices, size_t stride) {
- return Ref(GREGORY_PATCH, new (alloc(sizeof(GregoryPatch))) GregoryPatch(edge,vertices,stride));
- }
-
- __forceinline GregoryPatch (const HalfEdge* edge, const char* vertices, size_t stride)
- : patch(CatmullClarkPatch(edge,vertices,stride)) {}
-
- /* creates GregoryPatch from CatmullClarkPatch */
- template<typename Allocator>
- __noinline static Ref create(const Allocator& alloc, const CatmullClarkPatch& patch,
- const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3) {
- return Ref(GREGORY_PATCH, new (alloc(sizeof(GregoryPatch))) GregoryPatch(patch,border0,border1,border2,border3));
- }
-
- __forceinline GregoryPatch (const CatmullClarkPatch& patch, const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3)
- : patch(patch,border0,border1,border2,border3) {}
-
- public:
- GregoryPatchT<Vertex,Vertex_t> patch;
- };
-
- struct SubdividedQuadPatch
- {
- template<typename Allocator>
- __noinline static Ref create(const Allocator& alloc, Ref children[4]) {
- return Ref(SUBDIVIDED_QUAD_PATCH, new (alloc(sizeof(SubdividedQuadPatch))) SubdividedQuadPatch(children));
- }
-
- __forceinline SubdividedQuadPatch(Ref children[4]) {
- for (size_t i=0; i<4; i++) child[i] = children[i];
- }
-
- public:
- Ref child[4];
- };
-
- struct SubdividedGeneralPatch
- {
- template<typename Allocator>
- __noinline static Ref create(const Allocator& alloc, Ref* children, const unsigned N) {
- return Ref(SUBDIVIDED_GENERAL_PATCH, new (alloc(sizeof(SubdividedGeneralPatch))) SubdividedGeneralPatch(children,N));
- }
-
- __forceinline SubdividedGeneralPatch(Ref* children, const unsigned N) : N(N) {
- for (unsigned i=0; i<N; i++) child[i] = children[i];
- }
-
- unsigned N;
- Ref child[MAX_PATCH_VALENCE];
- };
-
- /*! Default constructor. */
- __forceinline PatchT () {}
-
- template<typename Allocator>
- __noinline static Ref create(const Allocator& alloc, const HalfEdge* edge, const char* vertices, size_t stride)
- {
- if (PATCH_MAX_CACHE_DEPTH == 0)
- return nullptr;
-
- Ref child(0);
- switch (edge->patch_type) {
- case HalfEdge::BILINEAR_PATCH: child = BilinearPatch::create(alloc,edge,vertices,stride); break;
- case HalfEdge::REGULAR_QUAD_PATCH: child = RegularPatch::create(alloc,edge,vertices,stride); break;
-#if PATCH_USE_GREGORY == 2
- case HalfEdge::IRREGULAR_QUAD_PATCH: child = GregoryPatch::create(alloc,edge,vertices,stride); break;
-#endif
- default: {
- GeneralCatmullClarkPatch patch(edge,vertices,stride);
- child = PatchT::create(alloc,patch,edge,vertices,stride,0);
- }
- }
- return child;
- }
-
- template<typename Allocator>
- __noinline static Ref create(const Allocator& alloc, GeneralCatmullClarkPatch& patch, const HalfEdge* edge, const char* vertices, size_t stride, size_t depth)
- {
- /* convert into standard quad patch if possible */
- if (likely(patch.isQuadPatch()))
- {
- CatmullClarkPatch qpatch; patch.init(qpatch);
- return PatchT::create(alloc,qpatch,edge,vertices,stride,depth);
- }
-
- /* do only cache up to some depth */
- if (depth >= PATCH_MAX_CACHE_DEPTH)
- return nullptr;
-
- /* subdivide patch */
- unsigned N;
- array_t<CatmullClarkPatch,GeneralCatmullClarkPatch::SIZE> patches;
- patch.subdivide(patches,N);
-
- if (N == 4)
- {
- Ref child[4];
-#if PATCH_USE_GREGORY == 2
- BezierCurve borders[GeneralCatmullClarkPatch::SIZE]; patch.getLimitBorder(borders);
- BezierCurve border0l,border0r; borders[0].subdivide(border0l,border0r);
- BezierCurve border1l,border1r; borders[1].subdivide(border1l,border1r);
- BezierCurve border2l,border2r; borders[2].subdivide(border2l,border2r);
- BezierCurve border3l,border3r; borders[3].subdivide(border3l,border3r);
- GeneralCatmullClarkPatch::fix_quad_ring_order(patches);
- child[0] = PatchT::create(alloc,patches[0],edge,vertices,stride,depth+1,&border0l,nullptr,nullptr,&border3r);
- child[1] = PatchT::create(alloc,patches[1],edge,vertices,stride,depth+1,&border0r,&border1l,nullptr,nullptr);
- child[2] = PatchT::create(alloc,patches[2],edge,vertices,stride,depth+1,nullptr,&border1r,&border2l,nullptr);
- child[3] = PatchT::create(alloc,patches[3],edge,vertices,stride,depth+1,nullptr,nullptr,&border2r,&border3l);
-#else
- GeneralCatmullClarkPatch::fix_quad_ring_order(patches);
- for (size_t i=0; i<4; i++)
- child[i] = PatchT::create(alloc,patches[i],edge,vertices,stride,depth+1);
-#endif
- return SubdividedQuadPatch::create(alloc,child);
- }
- else
- {
- assert(N<MAX_PATCH_VALENCE);
- Ref child[MAX_PATCH_VALENCE];
-
-#if PATCH_USE_GREGORY == 2
- BezierCurve borders[GeneralCatmullClarkPatch::SIZE];
- patch.getLimitBorder(borders);
-
- for (size_t i0=0; i0<N; i0++) {
- const size_t i2 = i0==0 ? N-1 : i0-1;
- BezierCurve border0l,border0r; borders[i0].subdivide(border0l,border0r);
- BezierCurve border2l,border2r; borders[i2].subdivide(border2l,border2r);
- child[i0] = PatchT::create(alloc,patches[i0],edge,vertices,stride,depth+1, &border0l, nullptr, nullptr, &border2r);
- }
-#else
- for (size_t i=0; i<N; i++)
- child[i] = PatchT::create(alloc,patches[i],edge,vertices,stride,depth+1);
-#endif
- return SubdividedGeneralPatch::create(alloc,child,N);
- }
-
- return nullptr;
- }
-
- static __forceinline bool final(const CatmullClarkPatch& patch, const typename CatmullClarkRing::Type type, size_t depth)
- {
- const size_t max_eval_depth = (type & CatmullClarkRing::TYPE_CREASES) ? PATCH_MAX_EVAL_DEPTH_CREASE : PATCH_MAX_EVAL_DEPTH_IRREGULAR;
-//#if PATCH_MIN_RESOLUTION
-// return patch.isFinalResolution(PATCH_MIN_RESOLUTION) || depth>=max_eval_depth;
-//#else
- return depth>=max_eval_depth;
-//#endif
- }
-
- template<typename Allocator>
- __noinline static Ref create(const Allocator& alloc, CatmullClarkPatch& patch, const HalfEdge* edge, const char* vertices, size_t stride, size_t depth,
- const BezierCurve* border0 = nullptr, const BezierCurve* border1 = nullptr, const BezierCurve* border2 = nullptr, const BezierCurve* border3 = nullptr)
- {
- const typename CatmullClarkPatch::Type ty = patch.type();
- if (unlikely(final(patch,ty,depth))) {
- if (ty & CatmullClarkRing::TYPE_REGULAR) return RegularPatch::create(alloc,patch,border0,border1,border2,border3);
- else return IrregularFillPatch::create(alloc,patch,border0,border1,border2,border3);
- }
- else if (ty & CatmullClarkRing::TYPE_REGULAR_CREASES) {
- assert(depth > 0); return RegularPatch::create(alloc,patch,border0,border1,border2,border3);
- }
-#if PATCH_USE_GREGORY == 2
- else if (ty & CatmullClarkRing::TYPE_GREGORY_CREASES) {
- assert(depth > 0); return GregoryPatch::create(alloc,patch,border0,border1,border2,border3);
- }
-#endif
- else if (depth >= PATCH_MAX_CACHE_DEPTH) {
- return EvalPatch::create(alloc,patch);
- }
-
- else
- {
- Ref child[4];
- array_t<CatmullClarkPatch,4> patches;
- patch.subdivide(patches);
-
- for (size_t i=0; i<4; i++)
- child[i] = PatchT::create(alloc,patches[i],edge,vertices,stride,depth+1);
- return SubdividedQuadPatch::create(alloc,child);
- }
- }
- };
-
- typedef PatchT<Vec3fa,Vec3fa_t> Patch3fa;
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/patch_eval.h b/thirdparty/embree-aarch64/kernels/subdiv/patch_eval.h
deleted file mode 100644
index 482d015fa3..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/patch_eval.h
+++ /dev/null
@@ -1,129 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "patch.h"
-#include "feature_adaptive_eval.h"
-
-namespace embree
-{
- namespace isa
- {
- template<typename Vertex, typename Vertex_t = Vertex>
- struct PatchEval
- {
- public:
-
- typedef PatchT<Vertex,Vertex_t> Patch;
- typedef typename Patch::Ref Ref;
- typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
-
- PatchEval (SharedLazyTessellationCache::CacheEntry& entry, size_t commitCounter,
- const HalfEdge* edge, const char* vertices, size_t stride, const float u, const float v,
- Vertex* P, Vertex* dPdu, Vertex* dPdv, Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv)
- : P(P), dPdu(dPdu), dPdv(dPdv), ddPdudu(ddPdudu), ddPdvdv(ddPdvdv), ddPdudv(ddPdudv)
- {
- /* conservative time for the very first allocation */
- auto time = SharedLazyTessellationCache::sharedLazyTessellationCache.getTime(commitCounter);
-
- Ref patch = SharedLazyTessellationCache::lookup(entry,commitCounter,[&] () {
- auto alloc = [&](size_t bytes) { return SharedLazyTessellationCache::malloc(bytes); };
- return Patch::create(alloc,edge,vertices,stride);
- },true);
-
- auto curTime = SharedLazyTessellationCache::sharedLazyTessellationCache.getTime(commitCounter);
- const bool allAllocationsValid = SharedLazyTessellationCache::validTime(time,curTime);
-
- if (patch && allAllocationsValid && eval(patch,u,v,1.0f,0)) {
- SharedLazyTessellationCache::unlock();
- return;
- }
- SharedLazyTessellationCache::unlock();
- FeatureAdaptiveEval<Vertex,Vertex_t>(edge,vertices,stride,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv);
- PATCH_DEBUG_SUBDIVISION(edge,c,-1,-1);
- }
-
- __forceinline bool eval_quad(const typename Patch::SubdividedQuadPatch* This, const float u, const float v, const float dscale, const size_t depth)
- {
- if (v < 0.5f) {
- if (u < 0.5f) return eval(This->child[0],2.0f*u,2.0f*v,2.0f*dscale,depth+1);
- else return eval(This->child[1],2.0f*u-1.0f,2.0f*v,2.0f*dscale,depth+1);
- } else {
- if (u > 0.5f) return eval(This->child[2],2.0f*u-1.0f,2.0f*v-1.0f,2.0f*dscale,depth+1);
- else return eval(This->child[3],2.0f*u,2.0f*v-1.0f,2.0f*dscale,depth+1);
- }
- }
-
- bool eval_general(const typename Patch::SubdividedGeneralPatch* This, const float U, const float V, const size_t depth)
- {
- const unsigned l = (unsigned) floor(0.5f*U); const float u = 2.0f*frac(0.5f*U)-0.5f;
- const unsigned h = (unsigned) floor(0.5f*V); const float v = 2.0f*frac(0.5f*V)-0.5f;
- const unsigned i = 4*h+l; assert(i<This->N);
- return eval(This->child[i],u,v,1.0f,depth+1);
- }
-
- bool eval(Ref This, const float& u, const float& v, const float dscale, const size_t depth)
- {
- if (!This) return false;
- //PRINT(depth);
- //PRINT2(u,v);
-
- switch (This.type())
- {
- case Patch::BILINEAR_PATCH: {
- //PRINT("bilinear");
- ((typename Patch::BilinearPatch*)This.object())->patch.eval(u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale);
- PATCH_DEBUG_SUBDIVISION(This,-1,c,c);
- return true;
- }
- case Patch::BSPLINE_PATCH: {
- //PRINT("bspline");
- ((typename Patch::BSplinePatch*)This.object())->patch.eval(u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale);
- PATCH_DEBUG_SUBDIVISION(This,-1,c,-1);
- return true;
- }
- case Patch::BEZIER_PATCH: {
- //PRINT("bezier");
- ((typename Patch::BezierPatch*)This.object())->patch.eval(u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale);
- PATCH_DEBUG_SUBDIVISION(This,-1,c,-1);
- return true;
- }
- case Patch::GREGORY_PATCH: {
- //PRINT("gregory");
- ((typename Patch::GregoryPatch*)This.object())->patch.eval(u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale);
- PATCH_DEBUG_SUBDIVISION(This,-1,-1,c);
- return true;
- }
- case Patch::SUBDIVIDED_QUAD_PATCH: {
- //PRINT("subdivided quad");
- return eval_quad(((typename Patch::SubdividedQuadPatch*)This.object()),u,v,dscale,depth);
- }
- case Patch::SUBDIVIDED_GENERAL_PATCH: {
- //PRINT("general_patch");
- assert(dscale == 1.0f);
- return eval_general(((typename Patch::SubdividedGeneralPatch*)This.object()),u,v,depth);
- }
- case Patch::EVAL_PATCH: {
- //PRINT("eval_patch");
- CatmullClarkPatch patch; patch.deserialize(This.object());
- FeatureAdaptiveEval<Vertex,Vertex_t>(patch,u,v,dscale,depth,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv);
- return true;
- }
- default:
- assert(false);
- return false;
- }
- }
-
- private:
- Vertex* const P;
- Vertex* const dPdu;
- Vertex* const dPdv;
- Vertex* const ddPdudu;
- Vertex* const ddPdvdv;
- Vertex* const ddPdudv;
- };
- }
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/patch_eval_grid.h b/thirdparty/embree-aarch64/kernels/subdiv/patch_eval_grid.h
deleted file mode 100644
index c05db55f4c..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/patch_eval_grid.h
+++ /dev/null
@@ -1,245 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "patch.h"
-#include "feature_adaptive_eval_grid.h"
-
-namespace embree
-{
- namespace isa
- {
- struct PatchEvalGrid
- {
- typedef Patch3fa Patch;
- typedef Patch::Ref Ref;
- typedef GeneralCatmullClarkPatch3fa GeneralCatmullClarkPatch;
- typedef CatmullClarkPatch3fa CatmullClarkPatch;
- typedef BSplinePatch3fa BSplinePatch;
- typedef BezierPatch3fa BezierPatch;
- typedef GregoryPatch3fa GregoryPatch;
- typedef BilinearPatch3fa BilinearPatch;
-
- private:
- const unsigned x0,x1;
- const unsigned y0,y1;
- const unsigned swidth,sheight;
- const float rcp_swidth, rcp_sheight;
- float* const Px;
- float* const Py;
- float* const Pz;
- float* const U;
- float* const V;
- float* const Nx;
- float* const Ny;
- float* const Nz;
- const unsigned dwidth,dheight;
- unsigned count;
-
- public:
-
- PatchEvalGrid (Ref patch, unsigned subPatch,
- const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
- float* Px, float* Py, float* Pz, float* U, float* V,
- float* Nx, float* Ny, float* Nz,
- const unsigned dwidth, const unsigned dheight)
- : x0(x0), x1(x1), y0(y0), y1(y1), swidth(swidth), sheight(sheight), rcp_swidth(1.0f/(swidth-1.0f)), rcp_sheight(1.0f/(sheight-1.0f)),
- Px(Px), Py(Py), Pz(Pz), U(U), V(V), Nx(Nx), Ny(Ny), Nz(Nz), dwidth(dwidth), dheight(dheight), count(0)
- {
- assert(swidth < (2<<20) && sheight < (2<<20));
- const BBox2f srange(Vec2f(0.0f,0.0f),Vec2f(float(swidth-1),float(sheight-1)));
- const BBox2f erange(Vec2f(float(x0),float(y0)),Vec2f((float)x1,(float)y1));
- bool done MAYBE_UNUSED = eval(patch,subPatch,srange,erange);
- assert(done);
- assert(count == (x1-x0+1)*(y1-y0+1));
- }
-
- template<typename Patch>
- __forceinline void evalLocalGrid(const Patch* patch, const BBox2f& srange, const int lx0, const int lx1, const int ly0, const int ly1)
- {
- const float scale_x = rcp(srange.upper.x-srange.lower.x);
- const float scale_y = rcp(srange.upper.y-srange.lower.y);
- count += (lx1-lx0)*(ly1-ly0);
-
-#if 0
- for (unsigned iy=ly0; iy<ly1; iy++) {
- for (unsigned ix=lx0; ix<lx1; ix++) {
- const float lu = select(ix == swidth -1, float(1.0f), (float(ix)-srange.lower.x)*scale_x);
- const float lv = select(iy == sheight-1, float(1.0f), (float(iy)-srange.lower.y)*scale_y);
- const Vec3fa p = patch->patch.eval(lu,lv);
- const float u = float(ix)*rcp_swidth;
- const float v = float(iy)*rcp_sheight;
- const int ofs = (iy-y0)*dwidth+(ix-x0);
- Px[ofs] = p.x;
- Py[ofs] = p.y;
- Pz[ofs] = p.z;
- U[ofs] = u;
- V[ofs] = v;
- }
- }
-#else
- foreach2(lx0,lx1,ly0,ly1,[&](const vboolx& valid, const vintx& ix, const vintx& iy) {
- const vfloatx lu = select(ix == swidth -1, vfloatx(1.0f), (vfloatx(ix)-srange.lower.x)*scale_x);
- const vfloatx lv = select(iy == sheight-1, vfloatx(1.0f), (vfloatx(iy)-srange.lower.y)*scale_y);
- const Vec3vfx p = patch->patch.eval(lu,lv);
- Vec3vfx n = zero;
- if (unlikely(Nx != nullptr)) n = normalize_safe(patch->patch.normal(lu,lv));
- const vfloatx u = vfloatx(ix)*rcp_swidth;
- const vfloatx v = vfloatx(iy)*rcp_sheight;
- const vintx ofs = (iy-y0)*dwidth+(ix-x0);
- if (likely(all(valid)) && all(iy==iy[0])) {
- const unsigned ofs2 = ofs[0];
- vfloatx::storeu(Px+ofs2,p.x);
- vfloatx::storeu(Py+ofs2,p.y);
- vfloatx::storeu(Pz+ofs2,p.z);
- vfloatx::storeu(U+ofs2,u);
- vfloatx::storeu(V+ofs2,v);
- if (unlikely(Nx != nullptr)) {
- vfloatx::storeu(Nx+ofs2,n.x);
- vfloatx::storeu(Ny+ofs2,n.y);
- vfloatx::storeu(Nz+ofs2,n.z);
- }
- } else {
- foreach_unique_index(valid,iy,[&](const vboolx& valid, const int iy0, const int j) {
- const unsigned ofs2 = ofs[j]-j;
- vfloatx::storeu(valid,Px+ofs2,p.x);
- vfloatx::storeu(valid,Py+ofs2,p.y);
- vfloatx::storeu(valid,Pz+ofs2,p.z);
- vfloatx::storeu(valid,U+ofs2,u);
- vfloatx::storeu(valid,V+ofs2,v);
- if (unlikely(Nx != nullptr)) {
- vfloatx::storeu(valid,Nx+ofs2,n.x);
- vfloatx::storeu(valid,Ny+ofs2,n.y);
- vfloatx::storeu(valid,Nz+ofs2,n.z);
- }
- });
- }
- });
-#endif
- }
-
- bool eval(Ref This, const BBox2f& srange, const BBox2f& erange, const unsigned depth)
- {
- if (erange.empty())
- return true;
-
- const int lx0 = (int) ceilf(erange.lower.x);
- const int lx1 = (int) ceilf(erange.upper.x) + (erange.upper.x == x1 && (srange.lower.x < erange.upper.x || erange.upper.x == 0));
- const int ly0 = (int) ceilf(erange.lower.y);
- const int ly1 = (int) ceilf(erange.upper.y) + (erange.upper.y == y1 && (srange.lower.y < erange.upper.y || erange.upper.y == 0));
- if (lx0 >= lx1 || ly0 >= ly1)
- return true;
-
- if (!This)
- return false;
-
- switch (This.type())
- {
- case Patch::BILINEAR_PATCH: {
- evalLocalGrid((Patch::BilinearPatch*)This.object(),srange,lx0,lx1,ly0,ly1);
- return true;
- }
- case Patch::BSPLINE_PATCH: {
- evalLocalGrid((Patch::BSplinePatch*)This.object(),srange,lx0,lx1,ly0,ly1);
- return true;
- }
- case Patch::BEZIER_PATCH: {
- evalLocalGrid((Patch::BezierPatch*)This.object(),srange,lx0,lx1,ly0,ly1);
- return true;
- }
- case Patch::GREGORY_PATCH: {
- evalLocalGrid((Patch::GregoryPatch*)This.object(),srange,lx0,lx1,ly0,ly1);
- return true;
- }
- case Patch::SUBDIVIDED_QUAD_PATCH:
- {
- const Vec2f c = srange.center();
- const BBox2f srange0(srange.lower,c);
- const BBox2f srange1(Vec2f(c.x,srange.lower.y),Vec2f(srange.upper.x,c.y));
- const BBox2f srange2(c,srange.upper);
- const BBox2f srange3(Vec2f(srange.lower.x,c.y),Vec2f(c.x,srange.upper.y));
-
- Patch::SubdividedQuadPatch* patch = (Patch::SubdividedQuadPatch*)This.object();
- eval(patch->child[0],srange0,intersect(srange0,erange),depth+1);
- eval(patch->child[1],srange1,intersect(srange1,erange),depth+1);
- eval(patch->child[2],srange2,intersect(srange2,erange),depth+1);
- eval(patch->child[3],srange3,intersect(srange3,erange),depth+1);
- return true;
- }
- case Patch::EVAL_PATCH: {
- CatmullClarkPatch patch; patch.deserialize(This.object());
- FeatureAdaptiveEvalGrid(patch,srange,erange,depth,x0,x1,y0,y1,swidth,sheight,Px,Py,Pz,U,V,Nx,Ny,Nz,dwidth,dheight);
- count += (lx1-lx0)*(ly1-ly0);
- return true;
- }
- default:
- assert(false);
- return false;
- }
- }
-
- bool eval(Ref This, unsigned subPatch, const BBox2f& srange, const BBox2f& erange)
- {
- if (!This)
- return false;
-
- switch (This.type())
- {
- case Patch::SUBDIVIDED_GENERAL_PATCH: {
- Patch::SubdividedGeneralPatch* patch = (Patch::SubdividedGeneralPatch*)This.object();
- assert(subPatch < patch->N);
- return eval(patch->child[subPatch],srange,erange,1);
- }
- default:
- assert(subPatch == 0);
- return eval(This,srange,erange,0);
- }
- }
- };
-
- __forceinline unsigned patch_eval_subdivision_count (const HalfEdge* h)
- {
- const unsigned N = h->numEdges();
- if (N == 4) return 1;
- else return N;
- }
-
- template<typename Tessellator>
- inline void patch_eval_subdivision (const HalfEdge* h, Tessellator tessellator)
- {
- const unsigned N = h->numEdges();
- int neighborSubdiv[GeneralCatmullClarkPatch3fa::SIZE]; // FIXME: use array_t
- float levels[GeneralCatmullClarkPatch3fa::SIZE];
- for (unsigned i=0; i<N; i++) {
- assert(i<GeneralCatmullClarkPatch3fa::SIZE);
- neighborSubdiv[i] = h->hasOpposite() ? h->opposite()->numEdges() != 4 : 0;
- levels[i] = h->edge_level;
- h = h->next();
- }
- if (N == 4)
- {
- const Vec2f uv[4] = { Vec2f(0.0f,0.0f), Vec2f(1.0f,0.0f), Vec2f(1.0f,1.0f), Vec2f(0.0f,1.0f) };
- tessellator(uv,neighborSubdiv,levels,0);
- }
- else
- {
- for (unsigned i=0; i<N; i++)
- {
- assert(i<MAX_PATCH_VALENCE);
- static_assert(MAX_PATCH_VALENCE <= 16, "MAX_PATCH_VALENCE > 16");
- const int h = (i >> 2) & 3, l = i & 3;
- const Vec2f subPatchID((float)l,(float)h);
- const Vec2f uv[4] = { 2.0f*subPatchID + (0.5f+Vec2f(0.0f,0.0f)),
- 2.0f*subPatchID + (0.5f+Vec2f(1.0f,0.0f)),
- 2.0f*subPatchID + (0.5f+Vec2f(1.0f,1.0f)),
- 2.0f*subPatchID + (0.5f+Vec2f(0.0f,1.0f)) };
- const int neighborSubdiv1[4] = { 0,0,0,0 };
- const float levels1[4] = { 0.5f*levels[(i+0)%N], 0.5f*levels[(i+0)%N], 0.5f*levels[(i+N-1)%N], 0.5f*levels[(i+N-1)%N] };
- tessellator(uv,neighborSubdiv1,levels1,i);
- }
- }
- }
- }
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/patch_eval_simd.h b/thirdparty/embree-aarch64/kernels/subdiv/patch_eval_simd.h
deleted file mode 100644
index 28016d9e20..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/patch_eval_simd.h
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "patch.h"
-#include "feature_adaptive_eval_simd.h"
-
-namespace embree
-{
- namespace isa
- {
- template<typename vbool, typename vint, typename vfloat, typename Vertex, typename Vertex_t = Vertex>
- struct PatchEvalSimd
- {
- public:
-
- typedef PatchT<Vertex,Vertex_t> Patch;
- typedef typename Patch::Ref Ref;
- typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
-
- PatchEvalSimd (SharedLazyTessellationCache::CacheEntry& entry, size_t commitCounter,
- const HalfEdge* edge, const char* vertices, size_t stride, const vbool& valid0, const vfloat& u, const vfloat& v,
- float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv, const size_t dstride, const size_t N)
- : P(P), dPdu(dPdu), dPdv(dPdv), ddPdudu(ddPdudu), ddPdvdv(ddPdvdv), ddPdudv(ddPdudv), dstride(dstride), N(N)
- {
- /* conservative time for the very first allocation */
- auto time = SharedLazyTessellationCache::sharedLazyTessellationCache.getTime(commitCounter);
-
- Ref patch = SharedLazyTessellationCache::lookup(entry,commitCounter,[&] () {
- auto alloc = [](size_t bytes) { return SharedLazyTessellationCache::malloc(bytes); };
- return Patch::create(alloc,edge,vertices,stride);
- }, true);
-
- auto curTime = SharedLazyTessellationCache::sharedLazyTessellationCache.getTime(commitCounter);
- const bool allAllocationsValid = SharedLazyTessellationCache::validTime(time,curTime);
-
- patch = allAllocationsValid ? patch : nullptr;
-
- /* use cached data structure for calculations */
- const vbool valid1 = patch ? eval(valid0,patch,u,v,1.0f,0) : vbool(false);
- SharedLazyTessellationCache::unlock();
- const vbool valid2 = valid0 & !valid1;
- if (any(valid2)) {
- FeatureAdaptiveEvalSimd<vbool,vint,vfloat,Vertex,Vertex_t>(edge,vertices,stride,valid2,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dstride,N);
- }
- }
-
- vbool eval_quad(const vbool& valid, const typename Patch::SubdividedQuadPatch* This, const vfloat& u, const vfloat& v, const float dscale, const size_t depth)
- {
- vbool ret = false;
- const vbool u0_mask = u < 0.5f, u1_mask = u >= 0.5f;
- const vbool v0_mask = v < 0.5f, v1_mask = v >= 0.5f;
- const vbool u0v0_mask = valid & u0_mask & v0_mask;
- const vbool u0v1_mask = valid & u0_mask & v1_mask;
- const vbool u1v0_mask = valid & u1_mask & v0_mask;
- const vbool u1v1_mask = valid & u1_mask & v1_mask;
- if (any(u0v0_mask)) ret |= eval(u0v0_mask,This->child[0],2.0f*u,2.0f*v,2.0f*dscale,depth+1);
- if (any(u1v0_mask)) ret |= eval(u1v0_mask,This->child[1],2.0f*u-1.0f,2.0f*v,2.0f*dscale,depth+1);
- if (any(u1v1_mask)) ret |= eval(u1v1_mask,This->child[2],2.0f*u-1.0f,2.0f*v-1.0f,2.0f*dscale,depth+1);
- if (any(u0v1_mask)) ret |= eval(u0v1_mask,This->child[3],2.0f*u,2.0f*v-1.0f,2.0f*dscale,depth+1);
- return ret;
- }
-
- vbool eval_general(const vbool& valid, const typename Patch::SubdividedGeneralPatch* patch, const vfloat& U, const vfloat& V, const size_t depth)
- {
- vbool ret = false;
- const vint l = (vint)floor(0.5f*U); const vfloat u = 2.0f*frac(0.5f*U)-0.5f;
- const vint h = (vint)floor(0.5f*V); const vfloat v = 2.0f*frac(0.5f*V)-0.5f;
- const vint i = (h<<2)+l; assert(all(valid,i<patch->N));
- foreach_unique(valid,i,[&](const vbool& valid, const int i) {
- ret |= eval(valid,patch->child[i],u,v,1.0f,depth+1);
- });
- return ret;
- }
-
- vbool eval(const vbool& valid, Ref This, const vfloat& u, const vfloat& v, const float dscale, const size_t depth)
- {
- if (!This) return false;
- switch (This.type())
- {
- case Patch::BILINEAR_PATCH: {
- ((typename Patch::BilinearPatch*)This.object())->patch.eval(valid,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
- return valid;
- }
- case Patch::BSPLINE_PATCH: {
- ((typename Patch::BSplinePatch*)This.object())->patch.eval(valid,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
- return valid;
- }
- case Patch::BEZIER_PATCH: {
- ((typename Patch::BezierPatch*)This.object())->patch.eval(valid,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
- return valid;
- }
- case Patch::GREGORY_PATCH: {
- ((typename Patch::GregoryPatch*)This.object())->patch.eval(valid,u,v,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
- return valid;
- }
- case Patch::SUBDIVIDED_QUAD_PATCH: {
- return eval_quad(valid,((typename Patch::SubdividedQuadPatch*)This.object()),u,v,dscale,depth);
- }
- case Patch::SUBDIVIDED_GENERAL_PATCH: {
- assert(dscale == 1.0f);
- return eval_general(valid,((typename Patch::SubdividedGeneralPatch*)This.object()),u,v,depth);
- }
- case Patch::EVAL_PATCH: {
- CatmullClarkPatch patch; patch.deserialize(This.object());
- FeatureAdaptiveEvalSimd<vbool,vint,vfloat,Vertex,Vertex_t>(patch,valid,u,v,dscale,depth,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dstride,N);
- return valid;
- }
- default:
- assert(false);
- return false;
- }
- }
-
- private:
- float* const P;
- float* const dPdu;
- float* const dPdv;
- float* const ddPdudu;
- float* const ddPdvdv;
- float* const ddPdudv;
- const size_t dstride;
- const size_t N;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/subdivpatch1base.h b/thirdparty/embree-aarch64/kernels/subdiv/subdivpatch1base.h
deleted file mode 100644
index d5bc403cca..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/subdivpatch1base.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../geometry/primitive.h"
-#include "bspline_patch.h"
-#include "bezier_patch.h"
-#include "gregory_patch.h"
-#include "gregory_patch_dense.h"
-#include "tessellation.h"
-#include "tessellation_cache.h"
-#include "gridrange.h"
-#include "patch_eval_grid.h"
-#include "feature_adaptive_eval_grid.h"
-#include "../common/scene_subdiv_mesh.h"
-
-namespace embree
-{
- struct __aligned(64) SubdivPatch1Base
- {
- public:
-
- enum Type {
- INVALID_PATCH = 0,
- BSPLINE_PATCH = 1,
- BEZIER_PATCH = 2,
- GREGORY_PATCH = 3,
- EVAL_PATCH = 5,
- BILINEAR_PATCH = 6,
- };
-
- enum Flags {
- TRANSITION_PATCH = 16,
- };
-
- /*! Default constructor. */
- __forceinline SubdivPatch1Base () {}
-
- SubdivPatch1Base (const unsigned int gID,
- const unsigned int pID,
- const unsigned int subPatch,
- const SubdivMesh *const mesh,
- const size_t time,
- const Vec2f uv[4],
- const float edge_level[4],
- const int subdiv[4],
- const int simd_width);
-
- __forceinline bool needsStitching() const {
- return flags & TRANSITION_PATCH;
- }
-
- __forceinline Vec2f getUV(const size_t i) const {
- return Vec2f((float)u[i],(float)v[i]) * (8.0f/0x10000);
- }
-
- static void computeEdgeLevels(const float edge_level[4], const int subdiv[4], float level[4]);
- static Vec2i computeGridSize(const float level[4]);
- bool updateEdgeLevels(const float edge_level[4], const int subdiv[4], const SubdivMesh *const mesh, const int simd_width);
-
- public:
-
- __forceinline size_t getGridBytes() const {
- const size_t grid_size_xyzuv = (grid_size_simd_blocks * VSIZEX) * 4;
- return 64*((grid_size_xyzuv+15) / 16);
- }
-
- __forceinline void write_lock() { mtx.lock(); }
- __forceinline void write_unlock() { mtx.unlock(); }
- __forceinline bool try_write_lock() { return mtx.try_lock(); }
- //__forceinline bool try_read_lock() { return mtx.try_read_lock(); }
-
- __forceinline void resetRootRef() {
- //assert( mtx.hasInitialState() );
- root_ref = SharedLazyTessellationCache::Tag();
- }
-
- __forceinline SharedLazyTessellationCache::CacheEntry& entry() {
- return (SharedLazyTessellationCache::CacheEntry&) root_ref;
- }
-
- public:
- __forceinline unsigned int geomID() const {
- return geom;
- }
-
- __forceinline unsigned int primID() const {
- return prim;
- }
-
- public:
- SharedLazyTessellationCache::Tag root_ref;
- SpinLock mtx;
-
- unsigned short u[4]; //!< 16bit discretized u,v coordinates
- unsigned short v[4];
- float level[4];
-
- unsigned char flags;
- unsigned char type;
- unsigned short grid_u_res;
- unsigned int geom; //!< geometry ID of the subdivision mesh this patch belongs to
- unsigned int prim; //!< primitive ID of this subdivision patch
- unsigned short grid_v_res;
-
- unsigned short grid_size_simd_blocks;
- unsigned int time_;
-
- struct PatchHalfEdge {
- const HalfEdge* edge;
- unsigned subPatch;
- };
-
- Vec3fa patch_v[4][4];
-
- const HalfEdge *edge() const { return ((PatchHalfEdge*)patch_v)->edge; }
- unsigned time() const { return time_; }
- unsigned subPatch() const { return ((PatchHalfEdge*)patch_v)->subPatch; }
-
- void set_edge(const HalfEdge *h) const { ((PatchHalfEdge*)patch_v)->edge = h; }
- void set_subPatch(const unsigned s) const { ((PatchHalfEdge*)patch_v)->subPatch = s; }
- };
-
- namespace isa
- {
- Vec3fa patchEval(const SubdivPatch1Base& patch, const float uu, const float vv);
- Vec3fa patchNormal(const SubdivPatch1Base& patch, const float uu, const float vv);
-
- template<typename simdf>
- Vec3<simdf> patchEval(const SubdivPatch1Base& patch, const simdf& uu, const simdf& vv);
-
- template<typename simdf>
- Vec3<simdf> patchNormal(const SubdivPatch1Base& patch, const simdf& uu, const simdf& vv);
-
-
- /* eval grid over patch and stich edges when required */
- void evalGrid(const SubdivPatch1Base& patch,
- const unsigned x0, const unsigned x1,
- const unsigned y0, const unsigned y1,
- const unsigned swidth, const unsigned sheight,
- float *__restrict__ const grid_x,
- float *__restrict__ const grid_y,
- float *__restrict__ const grid_z,
- float *__restrict__ const grid_u,
- float *__restrict__ const grid_v,
- const SubdivMesh* const geom);
-
- /* eval grid over patch and stich edges when required */
- BBox3fa evalGridBounds(const SubdivPatch1Base& patch,
- const unsigned x0, const unsigned x1,
- const unsigned y0, const unsigned y1,
- const unsigned swidth, const unsigned sheight,
- const SubdivMesh* const geom);
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/tessellation.h b/thirdparty/embree-aarch64/kernels/subdiv/tessellation.h
deleted file mode 100644
index bda1e2d559..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/tessellation.h
+++ /dev/null
@@ -1,161 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-namespace embree
-{
- /* adjust discret tessellation level for feature-adaptive pre-subdivision */
- __forceinline float adjustTessellationLevel(float l, const size_t sublevel)
- {
- for (size_t i=0; i<sublevel; i++) l *= 0.5f;
- float r = ceilf(l);
- for (size_t i=0; i<sublevel; i++) r *= 2.0f;
- return r;
- }
-
- __forceinline int stitch(const int x, const int fine, const int coarse) {
- return (2*x+1)*coarse/(2*fine);
- }
-
- __forceinline void stitchGridEdges(const unsigned int low_rate,
- const unsigned int high_rate,
- const unsigned int x0,
- const unsigned int x1,
- float * __restrict__ const uv_array,
- const unsigned int uv_array_step)
- {
-#if 1
- const float inv_low_rate = rcp((float)(low_rate-1));
- for (unsigned x=x0; x<=x1; x++) {
- uv_array[(x-x0)*uv_array_step] = float(stitch(x,high_rate-1,low_rate-1))*inv_low_rate;
- }
- if (unlikely(x1 == high_rate-1))
- uv_array[(x1-x0)*uv_array_step] = 1.0f;
-#else
- assert(low_rate < high_rate);
- assert(high_rate >= 2);
-
- const float inv_low_rate = rcp((float)(low_rate-1));
- const unsigned int dy = low_rate - 1;
- const unsigned int dx = high_rate - 1;
-
- int p = 2*dy-dx;
-
- unsigned int offset = 0;
- unsigned int y = 0;
- float value = 0.0f;
- for(unsigned int x=0;x<high_rate-1; x++) // '<=' would be correct but we will leave the 1.0f at the end
- {
- uv_array[offset] = value;
-
- offset += uv_array_step;
- if (unlikely(p > 0))
- {
- y++;
- value = (float)y * inv_low_rate;
- p -= 2*dx;
- }
- p += 2*dy;
- }
-#endif
- }
-
- __forceinline void stitchUVGrid(const float edge_levels[4],
- const unsigned int swidth,
- const unsigned int sheight,
- const unsigned int x0,
- const unsigned int y0,
- const unsigned int grid_u_res,
- const unsigned int grid_v_res,
- float * __restrict__ const u_array,
- float * __restrict__ const v_array)
- {
- const unsigned int x1 = x0+grid_u_res-1;
- const unsigned int y1 = y0+grid_v_res-1;
- const unsigned int int_edge_points0 = (unsigned int)edge_levels[0] + 1;
- const unsigned int int_edge_points1 = (unsigned int)edge_levels[1] + 1;
- const unsigned int int_edge_points2 = (unsigned int)edge_levels[2] + 1;
- const unsigned int int_edge_points3 = (unsigned int)edge_levels[3] + 1;
-
- if (unlikely(y0 == 0 && int_edge_points0 < swidth))
- stitchGridEdges(int_edge_points0,swidth,x0,x1,u_array,1);
-
- if (unlikely(y1 == sheight-1 && int_edge_points2 < swidth))
- stitchGridEdges(int_edge_points2,swidth,x0,x1,&u_array[(grid_v_res-1)*grid_u_res],1);
-
- if (unlikely(x0 == 0 && int_edge_points1 < sheight))
- stitchGridEdges(int_edge_points1,sheight,y0,y1,&v_array[grid_u_res-1],grid_u_res);
-
- if (unlikely(x1 == swidth-1 && int_edge_points3 < sheight))
- stitchGridEdges(int_edge_points3,sheight,y0,y1,v_array,grid_u_res);
- }
-
- __forceinline void gridUVTessellator(const float edge_levels[4],
- const unsigned int swidth,
- const unsigned int sheight,
- const unsigned int x0,
- const unsigned int y0,
- const unsigned int grid_u_res,
- const unsigned int grid_v_res,
- float * __restrict__ const u_array,
- float * __restrict__ const v_array)
- {
- assert( grid_u_res >= 1);
- assert( grid_v_res >= 1);
- assert( edge_levels[0] >= 1.0f );
- assert( edge_levels[1] >= 1.0f );
- assert( edge_levels[2] >= 1.0f );
- assert( edge_levels[3] >= 1.0f );
-
-#if defined(__AVX__)
- const vint8 grid_u_segments = vint8(swidth)-1;
- const vint8 grid_v_segments = vint8(sheight)-1;
-
- const vfloat8 inv_grid_u_segments = rcp(vfloat8(grid_u_segments));
- const vfloat8 inv_grid_v_segments = rcp(vfloat8(grid_v_segments));
-
- unsigned int index = 0;
- vint8 v_i( zero );
- for (unsigned int y=0;y<grid_v_res;y++,index+=grid_u_res,v_i += 1)
- {
- vint8 u_i ( step );
-
- const vbool8 m_v = v_i < grid_v_segments;
-
- for (unsigned int x=0;x<grid_u_res;x+=8, u_i += 8)
- {
- const vbool8 m_u = u_i < grid_u_segments;
- const vfloat8 u = select(m_u, vfloat8(x0+u_i) * inv_grid_u_segments, 1.0f);
- const vfloat8 v = select(m_v, vfloat8(y0+v_i) * inv_grid_v_segments, 1.0f);
- vfloat8::storeu(&u_array[index + x],u);
- vfloat8::storeu(&v_array[index + x],v);
- }
- }
- #else
- const vint4 grid_u_segments = vint4(swidth)-1;
- const vint4 grid_v_segments = vint4(sheight)-1;
-
- const vfloat4 inv_grid_u_segments = rcp(vfloat4(grid_u_segments));
- const vfloat4 inv_grid_v_segments = rcp(vfloat4(grid_v_segments));
-
- unsigned int index = 0;
- vint4 v_i( zero );
- for (unsigned int y=0;y<grid_v_res;y++,index+=grid_u_res,v_i += 1)
- {
- vint4 u_i ( step );
-
- const vbool4 m_v = v_i < grid_v_segments;
-
- for (unsigned int x=0;x<grid_u_res;x+=4, u_i += 4)
- {
- const vbool4 m_u = u_i < grid_u_segments;
- const vfloat4 u = select(m_u, vfloat4(x0+u_i) * inv_grid_u_segments, 1.0f);
- const vfloat4 v = select(m_v, vfloat4(y0+v_i) * inv_grid_v_segments, 1.0f);
- vfloat4::storeu(&u_array[index + x],u);
- vfloat4::storeu(&v_array[index + x],v);
- }
- }
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/tessellation_cache.h b/thirdparty/embree-aarch64/kernels/subdiv/tessellation_cache.h
deleted file mode 100644
index 5c215288b6..0000000000
--- a/thirdparty/embree-aarch64/kernels/subdiv/tessellation_cache.h
+++ /dev/null
@@ -1,325 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/default.h"
-
-/* force a complete cache invalidation when running out of allocation space */
-#define FORCE_SIMPLE_FLUSH 0
-
-#define THREAD_BLOCK_ATOMIC_ADD 4
-
-#if defined(DEBUG)
-#define CACHE_STATS(x)
-#else
-#define CACHE_STATS(x)
-#endif
-
-namespace embree
-{
- class SharedTessellationCacheStats
- {
- public:
- /* stats */
- static std::atomic<size_t> cache_accesses;
- static std::atomic<size_t> cache_hits;
- static std::atomic<size_t> cache_misses;
- static std::atomic<size_t> cache_flushes;
- static size_t cache_num_patches;
- __aligned(64) static SpinLock mtx;
-
- /* print stats for debugging */
- static void printStats();
- static void clearStats();
- };
-
- void resizeTessellationCache(size_t new_size);
- void resetTessellationCache();
-
- ////////////////////////////////////////////////////////////////////////////////
- ////////////////////////////////////////////////////////////////////////////////
- ////////////////////////////////////////////////////////////////////////////////
-
- struct __aligned(64) ThreadWorkState
- {
- ALIGNED_STRUCT_(64);
-
- std::atomic<size_t> counter;
- ThreadWorkState* next;
- bool allocated;
-
- __forceinline ThreadWorkState(bool allocated = false)
- : counter(0), next(nullptr), allocated(allocated)
- {
- assert( ((size_t)this % 64) == 0 );
- }
- };
-
- class __aligned(64) SharedLazyTessellationCache
- {
- public:
-
- static const size_t NUM_CACHE_SEGMENTS = 8;
- static const size_t NUM_PREALLOC_THREAD_WORK_STATES = 512;
- static const size_t COMMIT_INDEX_SHIFT = 32+8;
-#if defined(__X86_64__) || defined(__aarch64__)
- static const size_t REF_TAG_MASK = 0xffffffffff;
-#else
- static const size_t REF_TAG_MASK = 0x7FFFFFFF;
-#endif
- static const size_t MAX_TESSELLATION_CACHE_SIZE = REF_TAG_MASK+1;
- static const size_t BLOCK_SIZE = 64;
-
-
- /*! Per thread tessellation ref cache */
- static __thread ThreadWorkState* init_t_state;
- static ThreadWorkState* current_t_state;
-
- static __forceinline ThreadWorkState *threadState()
- {
- if (unlikely(!init_t_state))
- /* sets init_t_state, can't return pointer due to macosx icc bug*/
- SharedLazyTessellationCache::sharedLazyTessellationCache.getNextRenderThreadWorkState();
- return init_t_state;
- }
-
- struct Tag
- {
- __forceinline Tag() : data(0) {}
-
- __forceinline Tag(void* ptr, size_t combinedTime) {
- init(ptr,combinedTime);
- }
-
- __forceinline Tag(size_t ptr, size_t combinedTime) {
- init((void*)ptr,combinedTime);
- }
-
- __forceinline void init(void* ptr, size_t combinedTime)
- {
- if (ptr == nullptr) {
- data = 0;
- return;
- }
- int64_t new_root_ref = (int64_t) ptr;
- new_root_ref -= (int64_t)SharedLazyTessellationCache::sharedLazyTessellationCache.getDataPtr();
- assert( new_root_ref <= (int64_t)REF_TAG_MASK );
- new_root_ref |= (int64_t)combinedTime << COMMIT_INDEX_SHIFT;
- data = new_root_ref;
- }
-
- __forceinline int64_t get() const { return data.load(); }
- __forceinline void set( int64_t v ) { data.store(v); }
- __forceinline void reset() { data.store(0); }
-
- private:
- atomic<int64_t> data;
- };
-
- static __forceinline size_t extractCommitIndex(const int64_t v) { return v >> SharedLazyTessellationCache::COMMIT_INDEX_SHIFT; }
-
- struct CacheEntry
- {
- Tag tag;
- SpinLock mutex;
- };
-
- private:
-
- float *data;
- bool hugepages;
- size_t size;
- size_t maxBlocks;
- ThreadWorkState *threadWorkState;
-
- __aligned(64) std::atomic<size_t> localTime;
- __aligned(64) std::atomic<size_t> next_block;
- __aligned(64) SpinLock reset_state;
- __aligned(64) SpinLock linkedlist_mtx;
- __aligned(64) std::atomic<size_t> switch_block_threshold;
- __aligned(64) std::atomic<size_t> numRenderThreads;
-
-
- public:
-
-
- SharedLazyTessellationCache();
- ~SharedLazyTessellationCache();
-
- void getNextRenderThreadWorkState();
-
- __forceinline size_t maxAllocSize() const {
- return switch_block_threshold;
- }
-
- __forceinline size_t getCurrentIndex() { return localTime.load(); }
- __forceinline void addCurrentIndex(const size_t i=1) { localTime.fetch_add(i); }
-
- __forceinline size_t getTime(const size_t globalTime) {
- return localTime.load()+NUM_CACHE_SEGMENTS*globalTime;
- }
-
-
- __forceinline size_t lockThread (ThreadWorkState *const t_state, const ssize_t plus=1) { return t_state->counter.fetch_add(plus); }
- __forceinline size_t unlockThread(ThreadWorkState *const t_state, const ssize_t plus=-1) { assert(isLocked(t_state)); return t_state->counter.fetch_add(plus); }
-
- __forceinline bool isLocked(ThreadWorkState *const t_state) { return t_state->counter.load() != 0; }
-
- static __forceinline void lock () { sharedLazyTessellationCache.lockThread(threadState()); }
- static __forceinline void unlock() { sharedLazyTessellationCache.unlockThread(threadState()); }
- static __forceinline bool isLocked() { return sharedLazyTessellationCache.isLocked(threadState()); }
- static __forceinline size_t getState() { return threadState()->counter.load(); }
- static __forceinline void lockThreadLoop() { sharedLazyTessellationCache.lockThreadLoop(threadState()); }
-
- static __forceinline size_t getTCacheTime(const size_t globalTime) {
- return sharedLazyTessellationCache.getTime(globalTime);
- }
-
- /* per thread lock */
- __forceinline void lockThreadLoop (ThreadWorkState *const t_state)
- {
- while(1)
- {
- size_t lock = SharedLazyTessellationCache::sharedLazyTessellationCache.lockThread(t_state,1);
- if (unlikely(lock >= THREAD_BLOCK_ATOMIC_ADD))
- {
- /* lock failed wait until sync phase is over */
- sharedLazyTessellationCache.unlockThread(t_state,-1);
- sharedLazyTessellationCache.waitForUsersLessEqual(t_state,0);
- }
- else
- break;
- }
- }
-
- static __forceinline void* lookup(CacheEntry& entry, size_t globalTime)
- {
- const int64_t subdiv_patch_root_ref = entry.tag.get();
- CACHE_STATS(SharedTessellationCacheStats::cache_accesses++);
-
- if (likely(subdiv_patch_root_ref != 0))
- {
- const size_t subdiv_patch_root = (subdiv_patch_root_ref & REF_TAG_MASK) + (size_t)sharedLazyTessellationCache.getDataPtr();
- const size_t subdiv_patch_cache_index = extractCommitIndex(subdiv_patch_root_ref);
-
- if (likely( sharedLazyTessellationCache.validCacheIndex(subdiv_patch_cache_index,globalTime) ))
- {
- CACHE_STATS(SharedTessellationCacheStats::cache_hits++);
- return (void*) subdiv_patch_root;
- }
- }
- CACHE_STATS(SharedTessellationCacheStats::cache_misses++);
- return nullptr;
- }
-
- template<typename Constructor>
- static __forceinline auto lookup (CacheEntry& entry, size_t globalTime, const Constructor constructor, const bool before=false) -> decltype(constructor())
- {
- ThreadWorkState *t_state = SharedLazyTessellationCache::threadState();
-
- while (true)
- {
- sharedLazyTessellationCache.lockThreadLoop(t_state);
- void* patch = SharedLazyTessellationCache::lookup(entry,globalTime);
- if (patch) return (decltype(constructor())) patch;
-
- if (entry.mutex.try_lock())
- {
- if (!validTag(entry.tag,globalTime))
- {
- auto timeBefore = sharedLazyTessellationCache.getTime(globalTime);
- auto ret = constructor(); // thread is locked here!
- assert(ret);
- /* this should never return nullptr */
- auto timeAfter = sharedLazyTessellationCache.getTime(globalTime);
- auto time = before ? timeBefore : timeAfter;
- __memory_barrier();
- entry.tag = SharedLazyTessellationCache::Tag(ret,time);
- __memory_barrier();
- entry.mutex.unlock();
- return ret;
- }
- entry.mutex.unlock();
- }
- SharedLazyTessellationCache::sharedLazyTessellationCache.unlockThread(t_state);
- }
- }
-
- __forceinline bool validCacheIndex(const size_t i, const size_t globalTime)
- {
-#if FORCE_SIMPLE_FLUSH == 1
- return i == getTime(globalTime);
-#else
- return i+(NUM_CACHE_SEGMENTS-1) >= getTime(globalTime);
-#endif
- }
-
- static __forceinline bool validTime(const size_t oldtime, const size_t newTime)
- {
- return oldtime+(NUM_CACHE_SEGMENTS-1) >= newTime;
- }
-
-
- static __forceinline bool validTag(const Tag& tag, size_t globalTime)
- {
- const int64_t subdiv_patch_root_ref = tag.get();
- if (subdiv_patch_root_ref == 0) return false;
- const size_t subdiv_patch_cache_index = extractCommitIndex(subdiv_patch_root_ref);
- return sharedLazyTessellationCache.validCacheIndex(subdiv_patch_cache_index,globalTime);
- }
-
- void waitForUsersLessEqual(ThreadWorkState *const t_state,
- const unsigned int users);
-
- __forceinline size_t alloc(const size_t blocks)
- {
- if (unlikely(blocks >= switch_block_threshold))
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"allocation exceeds size of tessellation cache segment");
-
- assert(blocks < switch_block_threshold);
- size_t index = next_block.fetch_add(blocks);
- if (unlikely(index + blocks >= switch_block_threshold)) return (size_t)-1;
- return index;
- }
-
- static __forceinline void* malloc(const size_t bytes)
- {
- size_t block_index = -1;
- ThreadWorkState *const t_state = threadState();
- while (true)
- {
- block_index = sharedLazyTessellationCache.alloc((bytes+BLOCK_SIZE-1)/BLOCK_SIZE);
- if (block_index == (size_t)-1)
- {
- sharedLazyTessellationCache.unlockThread(t_state);
- sharedLazyTessellationCache.allocNextSegment();
- sharedLazyTessellationCache.lockThread(t_state);
- continue;
- }
- break;
- }
- return sharedLazyTessellationCache.getBlockPtr(block_index);
- }
-
- __forceinline void *getBlockPtr(const size_t block_index)
- {
- assert(block_index < maxBlocks);
- assert(data);
- assert(block_index*16 <= size);
- return (void*)&data[block_index*16];
- }
-
- __forceinline void* getDataPtr() { return data; }
- __forceinline size_t getNumUsedBytes() { return next_block * BLOCK_SIZE; }
- __forceinline size_t getMaxBlocks() { return maxBlocks; }
- __forceinline size_t getSize() { return size; }
-
- void allocNextSegment();
- void realloc(const size_t newSize);
-
- void reset();
-
- static SharedLazyTessellationCache sharedLazyTessellationCache;
- };
-}