summaryrefslogtreecommitdiff
path: root/thirdparty/embree-aarch64/kernels/common
diff options
context:
space:
mode:
authorjfons <joan.fonssanchez@gmail.com>2021-05-20 12:49:33 +0200
committerjfons <joan.fonssanchez@gmail.com>2021-05-21 17:00:24 +0200
commit767e374dced69b45db0afb30ca2ccf0bbbeef672 (patch)
treea712cecc2c8cc2c6d6ecdc4a50020d423ddb4c0c /thirdparty/embree-aarch64/kernels/common
parent42b6602f1d4b108cecb94b94c0d2b645acaebd4f (diff)
Upgrade Embree to the latest official release.
Since Embree v3.13.0 supports AARCH64, switch back to the official repo instead of using Embree-aarch64. `thirdparty/embree/patches/godot-changes.patch` should now contain an accurate diff of the changes done to the library.
Diffstat (limited to 'thirdparty/embree-aarch64/kernels/common')
-rw-r--r--thirdparty/embree-aarch64/kernels/common/accel.h556
-rw-r--r--thirdparty/embree-aarch64/kernels/common/accelinstance.h41
-rw-r--r--thirdparty/embree-aarch64/kernels/common/acceln.cpp232
-rw-r--r--thirdparty/embree-aarch64/kernels/common/acceln.h49
-rw-r--r--thirdparty/embree-aarch64/kernels/common/accelset.cpp17
-rw-r--r--thirdparty/embree-aarch64/kernels/common/accelset.h248
-rw-r--r--thirdparty/embree-aarch64/kernels/common/alloc.cpp82
-rw-r--r--thirdparty/embree-aarch64/kernels/common/alloc.h1006
-rw-r--r--thirdparty/embree-aarch64/kernels/common/buffer.h263
-rw-r--r--thirdparty/embree-aarch64/kernels/common/builder.h60
-rw-r--r--thirdparty/embree-aarch64/kernels/common/context.h131
-rw-r--r--thirdparty/embree-aarch64/kernels/common/default.h273
-rw-r--r--thirdparty/embree-aarch64/kernels/common/device.cpp567
-rw-r--r--thirdparty/embree-aarch64/kernels/common/device.h85
-rw-r--r--thirdparty/embree-aarch64/kernels/common/geometry.cpp259
-rw-r--r--thirdparty/embree-aarch64/kernels/common/geometry.h582
-rw-r--r--thirdparty/embree-aarch64/kernels/common/hit.h114
-rw-r--r--thirdparty/embree-aarch64/kernels/common/instance_stack.h199
-rw-r--r--thirdparty/embree-aarch64/kernels/common/isa.h271
-rw-r--r--thirdparty/embree-aarch64/kernels/common/motion_derivative.h325
-rw-r--r--thirdparty/embree-aarch64/kernels/common/point_query.h136
-rw-r--r--thirdparty/embree-aarch64/kernels/common/primref.h138
-rw-r--r--thirdparty/embree-aarch64/kernels/common/primref_mb.h262
-rw-r--r--thirdparty/embree-aarch64/kernels/common/profile.h159
-rw-r--r--thirdparty/embree-aarch64/kernels/common/ray.h1517
-rw-r--r--thirdparty/embree-aarch64/kernels/common/rtcore.cpp1799
-rw-r--r--thirdparty/embree-aarch64/kernels/common/rtcore.h142
-rw-r--r--thirdparty/embree-aarch64/kernels/common/rtcore_builder.cpp442
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene.cpp976
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene.h390
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_curves.h341
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_grid_mesh.h215
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_instance.h272
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_line_segments.h307
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_points.h282
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_quad_mesh.h277
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_subdiv_mesh.h326
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.cpp243
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.h264
-rw-r--r--thirdparty/embree-aarch64/kernels/common/scene_user_geometry.h77
-rw-r--r--thirdparty/embree-aarch64/kernels/common/stack_item.h125
-rw-r--r--thirdparty/embree-aarch64/kernels/common/stat.cpp128
-rw-r--r--thirdparty/embree-aarch64/kernels/common/stat.h116
-rw-r--r--thirdparty/embree-aarch64/kernels/common/state.cpp543
-rw-r--r--thirdparty/embree-aarch64/kernels/common/state.h197
-rw-r--r--thirdparty/embree-aarch64/kernels/common/vector.h76
46 files changed, 0 insertions, 15110 deletions
diff --git a/thirdparty/embree-aarch64/kernels/common/accel.h b/thirdparty/embree-aarch64/kernels/common/accel.h
deleted file mode 100644
index c038d3cf21..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/accel.h
+++ /dev/null
@@ -1,556 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "ray.h"
-#include "point_query.h"
-#include "context.h"
-
-namespace embree
-{
- class Scene;
-
- /*! Base class for the acceleration structure data. */
- class AccelData : public RefCount
- {
- ALIGNED_CLASS_(16);
- public:
- enum Type { TY_UNKNOWN = 0, TY_ACCELN = 1, TY_ACCEL_INSTANCE = 2, TY_BVH4 = 3, TY_BVH8 = 4 };
-
- public:
- AccelData (const Type type)
- : bounds(empty), type(type) {}
-
- /*! notifies the acceleration structure about the deletion of some geometry */
- virtual void deleteGeometry(size_t geomID) {};
-
- /*! clears the acceleration structure data */
- virtual void clear() = 0;
-
- /*! returns normal bounds */
- __forceinline BBox3fa getBounds() const {
- return bounds.bounds();
- }
-
- /*! returns bounds for some time */
- __forceinline BBox3fa getBounds(float t) const {
- return bounds.interpolate(t);
- }
-
- /*! returns linear bounds */
- __forceinline LBBox3fa getLinearBounds() const {
- return bounds;
- }
-
- /*! checks if acceleration structure is empty */
- __forceinline bool isEmpty() const {
- return bounds.bounds0.lower.x == float(pos_inf);
- }
-
- public:
- LBBox3fa bounds; // linear bounds
- Type type;
- };
-
- /*! Base class for all intersectable and buildable acceleration structures. */
- class Accel : public AccelData
- {
- ALIGNED_CLASS_(16);
- public:
-
- struct Intersectors;
-
- /*! Type of collide function */
- typedef void (*CollideFunc)(void* bvh0, void* bvh1, RTCCollideFunc callback, void* userPtr);
-
- /*! Type of point query function */
- typedef bool(*PointQueryFunc)(Intersectors* This, /*!< this pointer to accel */
- PointQuery* query, /*!< point query for lookup */
- PointQueryContext* context); /*!< point query context */
-
- /*! Type of intersect function pointer for single rays. */
- typedef void (*IntersectFunc)(Intersectors* This, /*!< this pointer to accel */
- RTCRayHit& ray, /*!< ray to intersect */
- IntersectContext* context);
-
- /*! Type of intersect function pointer for ray packets of size 4. */
- typedef void (*IntersectFunc4)(const void* valid, /*!< pointer to valid mask */
- Intersectors* This, /*!< this pointer to accel */
- RTCRayHit4& ray, /*!< ray packet to intersect */
- IntersectContext* context);
-
- /*! Type of intersect function pointer for ray packets of size 8. */
- typedef void (*IntersectFunc8)(const void* valid, /*!< pointer to valid mask */
- Intersectors* This, /*!< this pointer to accel */
- RTCRayHit8& ray, /*!< ray packet to intersect */
- IntersectContext* context);
-
- /*! Type of intersect function pointer for ray packets of size 16. */
- typedef void (*IntersectFunc16)(const void* valid, /*!< pointer to valid mask */
- Intersectors* This, /*!< this pointer to accel */
- RTCRayHit16& ray, /*!< ray packet to intersect */
- IntersectContext* context);
-
- /*! Type of intersect function pointer for ray packets of size N. */
- typedef void (*IntersectFuncN)(Intersectors* This, /*!< this pointer to accel */
- RTCRayHitN** ray, /*!< ray stream to intersect */
- const size_t N, /*!< number of rays in stream */
- IntersectContext* context /*!< layout flags */);
-
-
- /*! Type of occlusion function pointer for single rays. */
- typedef void (*OccludedFunc) (Intersectors* This, /*!< this pointer to accel */
- RTCRay& ray, /*!< ray to test occlusion */
- IntersectContext* context);
-
- /*! Type of occlusion function pointer for ray packets of size 4. */
- typedef void (*OccludedFunc4) (const void* valid, /*!< pointer to valid mask */
- Intersectors* This, /*!< this pointer to accel */
- RTCRay4& ray, /*!< ray packet to test occlusion. */
- IntersectContext* context);
-
- /*! Type of occlusion function pointer for ray packets of size 8. */
- typedef void (*OccludedFunc8) (const void* valid, /*!< pointer to valid mask */
- Intersectors* This, /*!< this pointer to accel */
- RTCRay8& ray, /*!< ray packet to test occlusion. */
- IntersectContext* context);
-
- /*! Type of occlusion function pointer for ray packets of size 16. */
- typedef void (*OccludedFunc16) (const void* valid, /*!< pointer to valid mask */
- Intersectors* This, /*!< this pointer to accel */
- RTCRay16& ray, /*!< ray packet to test occlusion. */
- IntersectContext* context);
-
- /*! Type of intersect function pointer for ray packets of size N. */
- typedef void (*OccludedFuncN)(Intersectors* This, /*!< this pointer to accel */
- RTCRayN** ray, /*!< ray stream to test occlusion */
- const size_t N, /*!< number of rays in stream */
- IntersectContext* context /*!< layout flags */);
- typedef void (*ErrorFunc) ();
-
- struct Collider
- {
- Collider (ErrorFunc error = nullptr)
- : collide((CollideFunc)error), name(nullptr) {}
-
- Collider (CollideFunc collide, const char* name)
- : collide(collide), name(name) {}
-
- operator bool() const { return name; }
-
- public:
- CollideFunc collide;
- const char* name;
- };
-
- struct Intersector1
- {
- Intersector1 (ErrorFunc error = nullptr)
- : intersect((IntersectFunc)error), occluded((OccludedFunc)error), name(nullptr) {}
-
- Intersector1 (IntersectFunc intersect, OccludedFunc occluded, const char* name)
- : intersect(intersect), occluded(occluded), pointQuery(nullptr), name(name) {}
-
- Intersector1 (IntersectFunc intersect, OccludedFunc occluded, PointQueryFunc pointQuery, const char* name)
- : intersect(intersect), occluded(occluded), pointQuery(pointQuery), name(name) {}
-
- operator bool() const { return name; }
-
- public:
- static const char* type;
- IntersectFunc intersect;
- OccludedFunc occluded;
- PointQueryFunc pointQuery;
- const char* name;
- };
-
- struct Intersector4
- {
- Intersector4 (ErrorFunc error = nullptr)
- : intersect((IntersectFunc4)error), occluded((OccludedFunc4)error), name(nullptr) {}
-
- Intersector4 (IntersectFunc4 intersect, OccludedFunc4 occluded, const char* name)
- : intersect(intersect), occluded(occluded), name(name) {}
-
- operator bool() const { return name; }
-
- public:
- static const char* type;
- IntersectFunc4 intersect;
- OccludedFunc4 occluded;
- const char* name;
- };
-
- struct Intersector8
- {
- Intersector8 (ErrorFunc error = nullptr)
- : intersect((IntersectFunc8)error), occluded((OccludedFunc8)error), name(nullptr) {}
-
- Intersector8 (IntersectFunc8 intersect, OccludedFunc8 occluded, const char* name)
- : intersect(intersect), occluded(occluded), name(name) {}
-
- operator bool() const { return name; }
-
- public:
- static const char* type;
- IntersectFunc8 intersect;
- OccludedFunc8 occluded;
- const char* name;
- };
-
- struct Intersector16
- {
- Intersector16 (ErrorFunc error = nullptr)
- : intersect((IntersectFunc16)error), occluded((OccludedFunc16)error), name(nullptr) {}
-
- Intersector16 (IntersectFunc16 intersect, OccludedFunc16 occluded, const char* name)
- : intersect(intersect), occluded(occluded), name(name) {}
-
- operator bool() const { return name; }
-
- public:
- static const char* type;
- IntersectFunc16 intersect;
- OccludedFunc16 occluded;
- const char* name;
- };
-
- struct IntersectorN
- {
- IntersectorN (ErrorFunc error = nullptr)
- : intersect((IntersectFuncN)error), occluded((OccludedFuncN)error), name(nullptr) {}
-
- IntersectorN (IntersectFuncN intersect, OccludedFuncN occluded, const char* name)
- : intersect(intersect), occluded(occluded), name(name) {}
-
- operator bool() const { return name; }
-
- public:
- static const char* type;
- IntersectFuncN intersect;
- OccludedFuncN occluded;
- const char* name;
- };
-
- struct Intersectors
- {
- Intersectors()
- : ptr(nullptr), leafIntersector(nullptr), collider(nullptr), intersector1(nullptr), intersector4(nullptr), intersector8(nullptr), intersector16(nullptr), intersectorN(nullptr) {}
-
- Intersectors (ErrorFunc error)
- : ptr(nullptr), leafIntersector(nullptr), collider(error), intersector1(error), intersector4(error), intersector8(error), intersector16(error), intersectorN(error) {}
-
- void print(size_t ident)
- {
- if (collider.name) {
- for (size_t i=0; i<ident; i++) std::cout << " ";
- std::cout << "collider = " << collider.name << std::endl;
- }
- if (intersector1.name) {
- for (size_t i=0; i<ident; i++) std::cout << " ";
- std::cout << "intersector1 = " << intersector1.name << std::endl;
- }
- if (intersector4.name) {
- for (size_t i=0; i<ident; i++) std::cout << " ";
- std::cout << "intersector4 = " << intersector4.name << std::endl;
- }
- if (intersector8.name) {
- for (size_t i=0; i<ident; i++) std::cout << " ";
- std::cout << "intersector8 = " << intersector8.name << std::endl;
- }
- if (intersector16.name) {
- for (size_t i=0; i<ident; i++) std::cout << " ";
- std::cout << "intersector16 = " << intersector16.name << std::endl;
- }
- if (intersectorN.name) {
- for (size_t i=0; i<ident; i++) std::cout << " ";
- std::cout << "intersectorN = " << intersectorN.name << std::endl;
- }
- }
-
- void select(bool filter)
- {
- if (intersector4_filter) {
- if (filter) intersector4 = intersector4_filter;
- else intersector4 = intersector4_nofilter;
- }
- if (intersector8_filter) {
- if (filter) intersector8 = intersector8_filter;
- else intersector8 = intersector8_nofilter;
- }
- if (intersector16_filter) {
- if (filter) intersector16 = intersector16_filter;
- else intersector16 = intersector16_nofilter;
- }
- if (intersectorN_filter) {
- if (filter) intersectorN = intersectorN_filter;
- else intersectorN = intersectorN_nofilter;
- }
- }
-
- __forceinline bool pointQuery (PointQuery* query, PointQueryContext* context) {
- assert(intersector1.pointQuery);
- return intersector1.pointQuery(this,query,context);
- }
-
- /*! collides two scenes */
- __forceinline void collide (Accel* scene0, Accel* scene1, RTCCollideFunc callback, void* userPtr) {
- assert(collider.collide);
- collider.collide(scene0->intersectors.ptr,scene1->intersectors.ptr,callback,userPtr);
- }
-
- /*! Intersects a single ray with the scene. */
- __forceinline void intersect (RTCRayHit& ray, IntersectContext* context) {
- assert(intersector1.intersect);
- intersector1.intersect(this,ray,context);
- }
-
- /*! Intersects a packet of 4 rays with the scene. */
- __forceinline void intersect4 (const void* valid, RTCRayHit4& ray, IntersectContext* context) {
- assert(intersector4.intersect);
- intersector4.intersect(valid,this,ray,context);
- }
-
- /*! Intersects a packet of 8 rays with the scene. */
- __forceinline void intersect8 (const void* valid, RTCRayHit8& ray, IntersectContext* context) {
- assert(intersector8.intersect);
- intersector8.intersect(valid,this,ray,context);
- }
-
- /*! Intersects a packet of 16 rays with the scene. */
- __forceinline void intersect16 (const void* valid, RTCRayHit16& ray, IntersectContext* context) {
- assert(intersector16.intersect);
- intersector16.intersect(valid,this,ray,context);
- }
-
- /*! Intersects a stream of N rays in SOA layout with the scene. */
- __forceinline void intersectN (RTCRayHitN** rayN, const size_t N, IntersectContext* context)
- {
- assert(intersectorN.intersect);
- intersectorN.intersect(this,rayN,N,context);
- }
-
-#if defined(__SSE__) || defined(__ARM_NEON)
- __forceinline void intersect(const vbool4& valid, RayHitK<4>& ray, IntersectContext* context) {
- const vint<4> mask = valid.mask32();
- intersect4(&mask,(RTCRayHit4&)ray,context);
- }
-#endif
-#if defined(__AVX__)
- __forceinline void intersect(const vbool8& valid, RayHitK<8>& ray, IntersectContext* context) {
- const vint<8> mask = valid.mask32();
- intersect8(&mask,(RTCRayHit8&)ray,context);
- }
-#endif
-#if defined(__AVX512F__)
- __forceinline void intersect(const vbool16& valid, RayHitK<16>& ray, IntersectContext* context) {
- const vint<16> mask = valid.mask32();
- intersect16(&mask,(RTCRayHit16&)ray,context);
- }
-#endif
-
- template<int K>
- __forceinline void intersectN (RayHitK<K>** rayN, const size_t N, IntersectContext* context)
- {
- intersectN((RTCRayHitN**)rayN,N,context);
- }
-
- /*! Tests if single ray is occluded by the scene. */
- __forceinline void occluded (RTCRay& ray, IntersectContext* context) {
- assert(intersector1.occluded);
- intersector1.occluded(this,ray,context);
- }
-
- /*! Tests if a packet of 4 rays is occluded by the scene. */
- __forceinline void occluded4 (const void* valid, RTCRay4& ray, IntersectContext* context) {
- assert(intersector4.occluded);
- intersector4.occluded(valid,this,ray,context);
- }
-
- /*! Tests if a packet of 8 rays is occluded by the scene. */
- __forceinline void occluded8 (const void* valid, RTCRay8& ray, IntersectContext* context) {
- assert(intersector8.occluded);
- intersector8.occluded(valid,this,ray,context);
- }
-
- /*! Tests if a packet of 16 rays is occluded by the scene. */
- __forceinline void occluded16 (const void* valid, RTCRay16& ray, IntersectContext* context) {
- assert(intersector16.occluded);
- intersector16.occluded(valid,this,ray,context);
- }
-
- /*! Tests if a stream of N rays in SOA layout is occluded by the scene. */
- __forceinline void occludedN (RTCRayN** rayN, const size_t N, IntersectContext* context)
- {
- assert(intersectorN.occluded);
- intersectorN.occluded(this,rayN,N,context);
- }
-
-#if defined(__SSE__) || defined(__ARM_NEON)
- __forceinline void occluded(const vbool4& valid, RayK<4>& ray, IntersectContext* context) {
- const vint<4> mask = valid.mask32();
- occluded4(&mask,(RTCRay4&)ray,context);
- }
-#endif
-#if defined(__AVX__)
- __forceinline void occluded(const vbool8& valid, RayK<8>& ray, IntersectContext* context) {
- const vint<8> mask = valid.mask32();
- occluded8(&mask,(RTCRay8&)ray,context);
- }
-#endif
-#if defined(__AVX512F__)
- __forceinline void occluded(const vbool16& valid, RayK<16>& ray, IntersectContext* context) {
- const vint<16> mask = valid.mask32();
- occluded16(&mask,(RTCRay16&)ray,context);
- }
-#endif
-
- template<int K>
- __forceinline void occludedN (RayK<K>** rayN, const size_t N, IntersectContext* context)
- {
- occludedN((RTCRayN**)rayN,N,context);
- }
-
- /*! Tests if single ray is occluded by the scene. */
- __forceinline void intersect(RTCRay& ray, IntersectContext* context) {
- occluded(ray, context);
- }
-
- /*! Tests if a packet of K rays is occluded by the scene. */
- template<int K>
- __forceinline void intersect(const vbool<K>& valid, RayK<K>& ray, IntersectContext* context) {
- occluded(valid, ray, context);
- }
-
- /*! Tests if a packet of N rays in SOA layout is occluded by the scene. */
- template<int K>
- __forceinline void intersectN(RayK<K>** rayN, const size_t N, IntersectContext* context) {
- occludedN(rayN, N, context);
- }
-
- public:
- AccelData* ptr;
- void* leafIntersector;
- Collider collider;
- Intersector1 intersector1;
- Intersector4 intersector4;
- Intersector4 intersector4_filter;
- Intersector4 intersector4_nofilter;
- Intersector8 intersector8;
- Intersector8 intersector8_filter;
- Intersector8 intersector8_nofilter;
- Intersector16 intersector16;
- Intersector16 intersector16_filter;
- Intersector16 intersector16_nofilter;
- IntersectorN intersectorN;
- IntersectorN intersectorN_filter;
- IntersectorN intersectorN_nofilter;
- };
-
- public:
-
- /*! Construction */
- Accel (const AccelData::Type type)
- : AccelData(type) {}
-
- /*! Construction */
- Accel (const AccelData::Type type, const Intersectors& intersectors)
- : AccelData(type), intersectors(intersectors) {}
-
- /*! Virtual destructor */
- virtual ~Accel() {}
-
- /*! makes the acceleration structure immutable */
- virtual void immutable () {}
-
- /*! build acceleration structure */
- virtual void build () = 0;
-
- public:
- Intersectors intersectors;
- };
-
-#define DEFINE_COLLIDER(symbol,collider) \
- Accel::Collider symbol() { \
- return Accel::Collider((Accel::CollideFunc)collider::collide, \
- TOSTRING(isa) "::" TOSTRING(symbol)); \
- }
-
-#define DEFINE_INTERSECTOR1(symbol,intersector) \
- Accel::Intersector1 symbol() { \
- return Accel::Intersector1((Accel::IntersectFunc )intersector::intersect, \
- (Accel::OccludedFunc )intersector::occluded, \
- (Accel::PointQueryFunc)intersector::pointQuery,\
- TOSTRING(isa) "::" TOSTRING(symbol)); \
- }
-
-#define DEFINE_INTERSECTOR4(symbol,intersector) \
- Accel::Intersector4 symbol() { \
- return Accel::Intersector4((Accel::IntersectFunc4)intersector::intersect, \
- (Accel::OccludedFunc4)intersector::occluded, \
- TOSTRING(isa) "::" TOSTRING(symbol)); \
- }
-
-#define DEFINE_INTERSECTOR8(symbol,intersector) \
- Accel::Intersector8 symbol() { \
- return Accel::Intersector8((Accel::IntersectFunc8)intersector::intersect, \
- (Accel::OccludedFunc8)intersector::occluded, \
- TOSTRING(isa) "::" TOSTRING(symbol)); \
- }
-
-#define DEFINE_INTERSECTOR16(symbol,intersector) \
- Accel::Intersector16 symbol() { \
- return Accel::Intersector16((Accel::IntersectFunc16)intersector::intersect, \
- (Accel::OccludedFunc16)intersector::occluded, \
- TOSTRING(isa) "::" TOSTRING(symbol)); \
- }
-
-#define DEFINE_INTERSECTORN(symbol,intersector) \
- Accel::IntersectorN symbol() { \
- return Accel::IntersectorN((Accel::IntersectFuncN)intersector::intersect, \
- (Accel::OccludedFuncN)intersector::occluded, \
- TOSTRING(isa) "::" TOSTRING(symbol)); \
- }
-
- /* ray stream filter interface */
- typedef void (*intersectStreamAOS_func)(Scene* scene, RTCRayHit* _rayN, const size_t N, const size_t stride, IntersectContext* context);
- typedef void (*intersectStreamAOP_func)(Scene* scene, RTCRayHit** _rayN, const size_t N, IntersectContext* context);
- typedef void (*intersectStreamSOA_func)(Scene* scene, char* rayN, const size_t N, const size_t streams, const size_t stream_offset, IntersectContext* context);
- typedef void (*intersectStreamSOP_func)(Scene* scene, const RTCRayHitNp* rayN, const size_t N, IntersectContext* context);
-
- typedef void (*occludedStreamAOS_func)(Scene* scene, RTCRay* _rayN, const size_t N, const size_t stride, IntersectContext* context);
- typedef void (*occludedStreamAOP_func)(Scene* scene, RTCRay** _rayN, const size_t N, IntersectContext* context);
- typedef void (*occludedStreamSOA_func)(Scene* scene, char* rayN, const size_t N, const size_t streams, const size_t stream_offset, IntersectContext* context);
- typedef void (*occludedStreamSOP_func)(Scene* scene, const RTCRayNp* rayN, const size_t N, IntersectContext* context);
-
- struct RayStreamFilterFuncs
- {
- RayStreamFilterFuncs()
- : intersectAOS(nullptr), intersectAOP(nullptr), intersectSOA(nullptr), intersectSOP(nullptr),
- occludedAOS(nullptr), occludedAOP(nullptr), occludedSOA(nullptr), occludedSOP(nullptr) {}
-
- RayStreamFilterFuncs(void (*ptr) ())
- : intersectAOS((intersectStreamAOS_func) ptr), intersectAOP((intersectStreamAOP_func) ptr), intersectSOA((intersectStreamSOA_func) ptr), intersectSOP((intersectStreamSOP_func) ptr),
- occludedAOS((occludedStreamAOS_func) ptr), occludedAOP((occludedStreamAOP_func) ptr), occludedSOA((occludedStreamSOA_func) ptr), occludedSOP((occludedStreamSOP_func) ptr) {}
-
- RayStreamFilterFuncs(intersectStreamAOS_func intersectAOS, intersectStreamAOP_func intersectAOP, intersectStreamSOA_func intersectSOA, intersectStreamSOP_func intersectSOP,
- occludedStreamAOS_func occludedAOS, occludedStreamAOP_func occludedAOP, occludedStreamSOA_func occludedSOA, occludedStreamSOP_func occludedSOP)
- : intersectAOS(intersectAOS), intersectAOP(intersectAOP), intersectSOA(intersectSOA), intersectSOP(intersectSOP),
- occludedAOS(occludedAOS), occludedAOP(occludedAOP), occludedSOA(occludedSOA), occludedSOP(occludedSOP) {}
-
- public:
- intersectStreamAOS_func intersectAOS;
- intersectStreamAOP_func intersectAOP;
- intersectStreamSOA_func intersectSOA;
- intersectStreamSOP_func intersectSOP;
-
- occludedStreamAOS_func occludedAOS;
- occludedStreamAOP_func occludedAOP;
- occludedStreamSOA_func occludedSOA;
- occludedStreamSOP_func occludedSOP;
- };
-
- typedef RayStreamFilterFuncs (*RayStreamFilterFuncsType)();
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/accelinstance.h b/thirdparty/embree-aarch64/kernels/common/accelinstance.h
deleted file mode 100644
index d74b96df3f..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/accelinstance.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "accel.h"
-#include "builder.h"
-
-namespace embree
-{
- class AccelInstance : public Accel
- {
- public:
- AccelInstance (AccelData* accel, Builder* builder, Intersectors& intersectors)
- : Accel(AccelData::TY_ACCEL_INSTANCE,intersectors), accel(accel), builder(builder) {}
-
- void immutable () {
- builder.reset(nullptr);
- }
-
- public:
- void build () {
- if (builder) builder->build();
- bounds = accel->bounds;
- }
-
- void deleteGeometry(size_t geomID) {
- if (accel ) accel->deleteGeometry(geomID);
- if (builder) builder->deleteGeometry(geomID);
- }
-
- void clear() {
- if (accel) accel->clear();
- if (builder) builder->clear();
- }
-
- private:
- std::unique_ptr<AccelData> accel;
- std::unique_ptr<Builder> builder;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/acceln.cpp b/thirdparty/embree-aarch64/kernels/common/acceln.cpp
deleted file mode 100644
index aadb4a64ef..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/acceln.cpp
+++ /dev/null
@@ -1,232 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "acceln.h"
-#include "ray.h"
-#include "../../include/embree3/rtcore_ray.h"
-#include "../../common/algorithms/parallel_for.h"
-
-namespace embree
-{
- AccelN::AccelN()
- : Accel(AccelData::TY_ACCELN), accels() {}
-
- AccelN::~AccelN()
- {
- for (size_t i=0; i<accels.size(); i++)
- delete accels[i];
- }
-
- void AccelN::accels_add(Accel* accel)
- {
- assert(accel);
- accels.push_back(accel);
- }
-
- void AccelN::accels_init()
- {
- for (size_t i=0; i<accels.size(); i++)
- delete accels[i];
-
- accels.clear();
- }
-
- bool AccelN::pointQuery (Accel::Intersectors* This_in, PointQuery* query, PointQueryContext* context)
- {
- bool changed = false;
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++)
- if (!This->accels[i]->isEmpty())
- changed |= This->accels[i]->intersectors.pointQuery(query,context);
- return changed;
- }
-
- void AccelN::intersect (Accel::Intersectors* This_in, RTCRayHit& ray, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++)
- if (!This->accels[i]->isEmpty())
- This->accels[i]->intersectors.intersect(ray,context);
- }
-
- void AccelN::intersect4 (const void* valid, Accel::Intersectors* This_in, RTCRayHit4& ray, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++)
- if (!This->accels[i]->isEmpty())
- This->accels[i]->intersectors.intersect4(valid,ray,context);
- }
-
- void AccelN::intersect8 (const void* valid, Accel::Intersectors* This_in, RTCRayHit8& ray, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++)
- if (!This->accels[i]->isEmpty())
- This->accels[i]->intersectors.intersect8(valid,ray,context);
- }
-
- void AccelN::intersect16 (const void* valid, Accel::Intersectors* This_in, RTCRayHit16& ray, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++)
- if (!This->accels[i]->isEmpty())
- This->accels[i]->intersectors.intersect16(valid,ray,context);
- }
-
- void AccelN::intersectN (Accel::Intersectors* This_in, RTCRayHitN** ray, const size_t N, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++)
- if (!This->accels[i]->isEmpty())
- This->accels[i]->intersectors.intersectN(ray,N,context);
- }
-
- void AccelN::occluded (Accel::Intersectors* This_in, RTCRay& ray, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++) {
- if (This->accels[i]->isEmpty()) continue;
- This->accels[i]->intersectors.occluded(ray,context);
- if (ray.tfar < 0.0f) break;
- }
- }
-
- void AccelN::occluded4 (const void* valid, Accel::Intersectors* This_in, RTCRay4& ray, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++) {
- if (This->accels[i]->isEmpty()) continue;
- This->accels[i]->intersectors.occluded4(valid,ray,context);
-#if defined(__SSE2__) || defined(__ARM_NEON)
- vbool4 valid0 = asBool(((vint4*)valid)[0]);
- vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
- if (unlikely(none(valid0 & hit0))) break;
-#endif
- }
- }
-
- void AccelN::occluded8 (const void* valid, Accel::Intersectors* This_in, RTCRay8& ray, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++) {
- if (This->accels[i]->isEmpty()) continue;
- This->accels[i]->intersectors.occluded8(valid,ray,context);
-#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA
- vbool4 valid0 = asBool(((vint4*)valid)[0]);
- vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
- vbool4 valid1 = asBool(((vint4*)valid)[1]);
- vbool4 hit1 = ((vfloat4*)ray.tfar)[1] >= vfloat4(zero);
- if (unlikely((none((valid0 & hit0) | (valid1 & hit1))))) break;
-#endif
- }
- }
-
- void AccelN::occluded16 (const void* valid, Accel::Intersectors* This_in, RTCRay16& ray, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- for (size_t i=0; i<This->accels.size(); i++) {
- if (This->accels[i]->isEmpty()) continue;
- This->accels[i]->intersectors.occluded16(valid,ray,context);
-#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA
- vbool4 valid0 = asBool(((vint4*)valid)[0]);
- vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
- vbool4 valid1 = asBool(((vint4*)valid)[1]);
- vbool4 hit1 = ((vfloat4*)ray.tfar)[1] >= vfloat4(zero);
- vbool4 valid2 = asBool(((vint4*)valid)[2]);
- vbool4 hit2 = ((vfloat4*)ray.tfar)[2] >= vfloat4(zero);
- vbool4 valid3 = asBool(((vint4*)valid)[3]);
- vbool4 hit3 = ((vfloat4*)ray.tfar)[3] >= vfloat4(zero);
- if (unlikely((none((valid0 & hit0) | (valid1 & hit1) | (valid2 & hit2) | (valid3 & hit3))))) break;
-#endif
- }
- }
-
- void AccelN::occludedN (Accel::Intersectors* This_in, RTCRayN** ray, const size_t N, IntersectContext* context)
- {
- AccelN* This = (AccelN*)This_in->ptr;
- size_t M = N;
- for (size_t i=0; i<This->accels.size(); i++)
- if (!This->accels[i]->isEmpty())
- This->accels[i]->intersectors.occludedN(ray,M,context);
- }
-
- void AccelN::accels_print(size_t ident)
- {
- for (size_t i=0; i<accels.size(); i++)
- {
- for (size_t j=0; j<ident; j++) std::cout << " ";
- std::cout << "accels[" << i << "]" << std::endl;
- accels[i]->intersectors.print(ident+2);
- }
- }
-
- void AccelN::accels_immutable()
- {
- for (size_t i=0; i<accels.size(); i++)
- accels[i]->immutable();
- }
-
- void AccelN::accels_build ()
- {
- /* reduce memory consumption */
- accels.shrink_to_fit();
-
- /* build all acceleration structures in parallel */
- parallel_for (accels.size(), [&] (size_t i) {
- accels[i]->build();
- });
-
- /* create list of non-empty acceleration structures */
- bool valid1 = true;
- bool valid4 = true;
- bool valid8 = true;
- bool valid16 = true;
- for (size_t i=0; i<accels.size(); i++) {
- valid1 &= (bool) accels[i]->intersectors.intersector1;
- valid4 &= (bool) accels[i]->intersectors.intersector4;
- valid8 &= (bool) accels[i]->intersectors.intersector8;
- valid16 &= (bool) accels[i]->intersectors.intersector16;
- }
-
- if (accels.size() == 1) {
- type = accels[0]->type; // FIXME: should just assign entire Accel
- bounds = accels[0]->bounds;
- intersectors = accels[0]->intersectors;
- }
- else
- {
- type = AccelData::TY_ACCELN;
- intersectors.ptr = this;
- intersectors.intersector1 = Intersector1(&intersect,&occluded,&pointQuery,valid1 ? "AccelN::intersector1": nullptr);
- intersectors.intersector4 = Intersector4(&intersect4,&occluded4,valid4 ? "AccelN::intersector4" : nullptr);
- intersectors.intersector8 = Intersector8(&intersect8,&occluded8,valid8 ? "AccelN::intersector8" : nullptr);
- intersectors.intersector16 = Intersector16(&intersect16,&occluded16,valid16 ? "AccelN::intersector16": nullptr);
- intersectors.intersectorN = IntersectorN(&intersectN,&occludedN,"AccelN::intersectorN");
-
- /*! calculate bounds */
- bounds = empty;
- for (size_t i=0; i<accels.size(); i++)
- bounds.extend(accels[i]->bounds);
- }
- }
-
- void AccelN::accels_select(bool filter)
- {
- for (size_t i=0; i<accels.size(); i++)
- accels[i]->intersectors.select(filter);
- }
-
- void AccelN::accels_deleteGeometry(size_t geomID)
- {
- for (size_t i=0; i<accels.size(); i++)
- accels[i]->deleteGeometry(geomID);
- }
-
- void AccelN::accels_clear()
- {
- for (size_t i=0; i<accels.size(); i++) {
- accels[i]->clear();
- }
- }
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/common/acceln.h b/thirdparty/embree-aarch64/kernels/common/acceln.h
deleted file mode 100644
index 2edd98f647..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/acceln.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "accel.h"
-
-namespace embree
-{
- /*! merges N acceleration structures together, by processing them in order */
- class AccelN : public Accel
- {
- public:
- AccelN ();
- ~AccelN();
-
- public:
- void accels_add(Accel* accel);
- void accels_init();
-
- public:
- static bool pointQuery (Accel::Intersectors* This, PointQuery* query, PointQueryContext* context);
-
- public:
- static void intersect (Accel::Intersectors* This, RTCRayHit& ray, IntersectContext* context);
- static void intersect4 (const void* valid, Accel::Intersectors* This, RTCRayHit4& ray, IntersectContext* context);
- static void intersect8 (const void* valid, Accel::Intersectors* This, RTCRayHit8& ray, IntersectContext* context);
- static void intersect16 (const void* valid, Accel::Intersectors* This, RTCRayHit16& ray, IntersectContext* context);
- static void intersectN (Accel::Intersectors* This, RTCRayHitN** ray, const size_t N, IntersectContext* context);
-
- public:
- static void occluded (Accel::Intersectors* This, RTCRay& ray, IntersectContext* context);
- static void occluded4 (const void* valid, Accel::Intersectors* This, RTCRay4& ray, IntersectContext* context);
- static void occluded8 (const void* valid, Accel::Intersectors* This, RTCRay8& ray, IntersectContext* context);
- static void occluded16 (const void* valid, Accel::Intersectors* This, RTCRay16& ray, IntersectContext* context);
- static void occludedN (Accel::Intersectors* This, RTCRayN** ray, const size_t N, IntersectContext* context);
-
- public:
- void accels_print(size_t ident);
- void accels_immutable();
- void accels_build ();
- void accels_select(bool filter);
- void accels_deleteGeometry(size_t geomID);
- void accels_clear ();
-
- public:
- std::vector<Accel*> accels;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/accelset.cpp b/thirdparty/embree-aarch64/kernels/common/accelset.cpp
deleted file mode 100644
index 79be1c4301..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/accelset.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "accelset.h"
-#include "scene.h"
-
-namespace embree
-{
- AccelSet::AccelSet (Device* device, Geometry::GType gtype, size_t numItems, size_t numTimeSteps)
- : Geometry(device,gtype,(unsigned int)numItems,(unsigned int)numTimeSteps), boundsFunc(nullptr) {}
-
- AccelSet::IntersectorN::IntersectorN (ErrorFunc error)
- : intersect((IntersectFuncN)error), occluded((OccludedFuncN)error), name(nullptr) {}
-
- AccelSet::IntersectorN::IntersectorN (IntersectFuncN intersect, OccludedFuncN occluded, const char* name)
- : intersect(intersect), occluded(occluded), name(name) {}
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/accelset.h b/thirdparty/embree-aarch64/kernels/common/accelset.h
deleted file mode 100644
index 3774b2accb..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/accelset.h
+++ /dev/null
@@ -1,248 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "builder.h"
-#include "geometry.h"
-#include "ray.h"
-#include "hit.h"
-
-namespace embree
-{
- struct IntersectFunctionNArguments;
- struct OccludedFunctionNArguments;
-
- typedef void (*ReportIntersectionFunc) (IntersectFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args);
- typedef void (*ReportOcclusionFunc) (OccludedFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args);
-
- struct IntersectFunctionNArguments : public RTCIntersectFunctionNArguments
- {
- IntersectContext* internal_context;
- Geometry* geometry;
- ReportIntersectionFunc report;
- };
-
- struct OccludedFunctionNArguments : public RTCOccludedFunctionNArguments
- {
- IntersectContext* internal_context;
- Geometry* geometry;
- ReportOcclusionFunc report;
- };
-
- /*! Base class for set of acceleration structures. */
- class AccelSet : public Geometry
- {
- public:
- typedef RTCIntersectFunctionN IntersectFuncN;
- typedef RTCOccludedFunctionN OccludedFuncN;
- typedef void (*ErrorFunc) ();
-
- struct IntersectorN
- {
- IntersectorN (ErrorFunc error = nullptr) ;
- IntersectorN (IntersectFuncN intersect, OccludedFuncN occluded, const char* name);
-
- operator bool() const { return name; }
-
- public:
- static const char* type;
- IntersectFuncN intersect;
- OccludedFuncN occluded;
- const char* name;
- };
-
- public:
-
- /*! construction */
- AccelSet (Device* device, Geometry::GType gtype, size_t items, size_t numTimeSteps);
-
- /*! makes the acceleration structure immutable */
- virtual void immutable () {}
-
- /*! build accel */
- virtual void build () = 0;
-
- /*! check if the i'th primitive is valid between the specified time range */
- __forceinline bool valid(size_t i, const range<size_t>& itime_range) const
- {
- for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
- if (!isvalid_non_empty(bounds(i,itime))) return false;
-
- return true;
- }
-
- /*! Calculates the bounds of an item */
- __forceinline BBox3fa bounds(size_t i, size_t itime = 0) const
- {
- BBox3fa box;
- assert(i < size());
- RTCBoundsFunctionArguments args;
- args.geometryUserPtr = userPtr;
- args.primID = (unsigned int)i;
- args.timeStep = (unsigned int)itime;
- args.bounds_o = (RTCBounds*)&box;
- boundsFunc(&args);
- return box;
- }
-
- /*! calculates the linear bounds of the i'th item at the itime'th time segment */
- __forceinline LBBox3fa linearBounds(size_t i, size_t itime) const
- {
- BBox3fa box[2];
- assert(i < size());
- RTCBoundsFunctionArguments args;
- args.geometryUserPtr = userPtr;
- args.primID = (unsigned int)i;
- args.timeStep = (unsigned int)(itime+0);
- args.bounds_o = (RTCBounds*)&box[0];
- boundsFunc(&args);
- args.timeStep = (unsigned int)(itime+1);
- args.bounds_o = (RTCBounds*)&box[1];
- boundsFunc(&args);
- return LBBox3fa(box[0],box[1]);
- }
-
- /*! calculates the build bounds of the i'th item, if it's valid */
- __forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
- {
- const BBox3fa b = bounds(i);
- if (bbox) *bbox = b;
- return isvalid_non_empty(b);
- }
-
- /*! calculates the build bounds of the i'th item at the itime'th time segment, if it's valid */
- __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
- {
- const LBBox3fa bounds = linearBounds(i,itime);
- bbox = bounds.bounds0; // use bounding box of first timestep to build BVH
- return isvalid_non_empty(bounds);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
- return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const {
- if (!valid(i, timeSegmentRange(time_range))) return false;
- bbox = linearBounds(i, time_range);
- return true;
- }
-
- /* gets version info of topology */
- unsigned int getTopologyVersion() const {
- return numPrimitives;
- }
-
- /* returns true if topology changed */
- bool topologyChanged(unsigned int otherVersion) const {
- return numPrimitives != otherVersion;
- }
-
- public:
-
- /*! Intersects a single ray with the scene. */
- __forceinline void intersect (RayHit& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportIntersectionFunc report)
- {
- assert(primID < size());
- assert(intersectorN.intersect);
-
- int mask = -1;
- IntersectFunctionNArguments args;
- args.valid = &mask;
- args.geometryUserPtr = userPtr;
- args.context = context->user;
- args.rayhit = (RTCRayHitN*)&ray;
- args.N = 1;
- args.geomID = geomID;
- args.primID = primID;
- args.internal_context = context;
- args.geometry = this;
- args.report = report;
-
- intersectorN.intersect(&args);
- }
-
- /*! Tests if single ray is occluded by the scene. */
- __forceinline void occluded (Ray& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportOcclusionFunc report)
- {
- assert(primID < size());
- assert(intersectorN.occluded);
-
- int mask = -1;
- OccludedFunctionNArguments args;
- args.valid = &mask;
- args.geometryUserPtr = userPtr;
- args.context = context->user;
- args.ray = (RTCRayN*)&ray;
- args.N = 1;
- args.geomID = geomID;
- args.primID = primID;
- args.internal_context = context;
- args.geometry = this;
- args.report = report;
-
- intersectorN.occluded(&args);
- }
-
- /*! Intersects a packet of K rays with the scene. */
- template<int K>
- __forceinline void intersect (const vbool<K>& valid, RayHitK<K>& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportIntersectionFunc report)
- {
- assert(primID < size());
- assert(intersectorN.intersect);
-
- vint<K> mask = valid.mask32();
- IntersectFunctionNArguments args;
- args.valid = (int*)&mask;
- args.geometryUserPtr = userPtr;
- args.context = context->user;
- args.rayhit = (RTCRayHitN*)&ray;
- args.N = K;
- args.geomID = geomID;
- args.primID = primID;
- args.internal_context = context;
- args.geometry = this;
- args.report = report;
-
- intersectorN.intersect(&args);
- }
-
- /*! Tests if a packet of K rays is occluded by the scene. */
- template<int K>
- __forceinline void occluded (const vbool<K>& valid, RayK<K>& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportOcclusionFunc report)
- {
- assert(primID < size());
- assert(intersectorN.occluded);
-
- vint<K> mask = valid.mask32();
- OccludedFunctionNArguments args;
- args.valid = (int*)&mask;
- args.geometryUserPtr = userPtr;
- args.context = context->user;
- args.ray = (RTCRayN*)&ray;
- args.N = K;
- args.geomID = geomID;
- args.primID = primID;
- args.internal_context = context;
- args.geometry = this;
- args.report = report;
-
- intersectorN.occluded(&args);
- }
-
- public:
- RTCBoundsFunction boundsFunc;
- IntersectorN intersectorN;
- };
-
-#define DEFINE_SET_INTERSECTORN(symbol,intersector) \
- AccelSet::IntersectorN symbol() { \
- return AccelSet::IntersectorN(intersector::intersect, \
- intersector::occluded, \
- TOSTRING(isa) "::" TOSTRING(symbol)); \
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/alloc.cpp b/thirdparty/embree-aarch64/kernels/common/alloc.cpp
deleted file mode 100644
index 6fa406f03a..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/alloc.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "alloc.h"
-#include "../../common/sys/thread.h"
-#if defined(__aarch64__) && defined(BUILD_IOS)
-#include "../../common/sys/barrier.h"
-#endif
-
-namespace embree
-{
- __thread FastAllocator::ThreadLocal2* FastAllocator::thread_local_allocator2 = nullptr;
- SpinLock FastAllocator::s_thread_local_allocators_lock;
- std::vector<std::unique_ptr<FastAllocator::ThreadLocal2>> FastAllocator::s_thread_local_allocators;
-
- struct fast_allocator_regression_test : public RegressionTest
- {
- BarrierSys barrier;
- std::atomic<size_t> numFailed;
- std::unique_ptr<FastAllocator> alloc;
-
- fast_allocator_regression_test()
- : RegressionTest("fast_allocator_regression_test"), numFailed(0)
- {
- registerRegressionTest(this);
- }
-
- static void thread_alloc(fast_allocator_regression_test* This)
- {
- FastAllocator::CachedAllocator threadalloc = This->alloc->getCachedAllocator();
-
- size_t* ptrs[1000];
- for (size_t j=0; j<1000; j++)
- {
- This->barrier.wait();
- for (size_t i=0; i<1000; i++) {
- ptrs[i] = (size_t*) threadalloc.malloc0(sizeof(size_t)+(i%32));
- *ptrs[i] = size_t(threadalloc.talloc0) + i;
- }
- for (size_t i=0; i<1000; i++) {
- if (*ptrs[i] != size_t(threadalloc.talloc0) + i)
- This->numFailed++;
- }
- This->barrier.wait();
- }
- }
-
- bool run ()
- {
- alloc = make_unique(new FastAllocator(nullptr,false));
- numFailed.store(0);
-
- size_t numThreads = getNumberOfLogicalThreads();
- barrier.init(numThreads+1);
-
- /* create threads */
- std::vector<thread_t> threads;
- for (size_t i=0; i<numThreads; i++)
- threads.push_back(createThread((thread_func)thread_alloc,this));
-
- /* run test */
- for (size_t i=0; i<1000; i++)
- {
- alloc->reset();
- barrier.wait();
- barrier.wait();
- }
-
- /* destroy threads */
- for (size_t i=0; i<numThreads; i++)
- join(threads[i]);
-
- alloc = nullptr;
-
- return numFailed == 0;
- }
- };
-
- fast_allocator_regression_test fast_allocator_regression;
-}
-
-
diff --git a/thirdparty/embree-aarch64/kernels/common/alloc.h b/thirdparty/embree-aarch64/kernels/common/alloc.h
deleted file mode 100644
index 488fa707ef..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/alloc.h
+++ /dev/null
@@ -1,1006 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "device.h"
-#include "scene.h"
-#include "primref.h"
-
-#if defined(__aarch64__) && defined(BUILD_IOS)
-#include <mutex>
-#endif
-
-namespace embree
-{
- class FastAllocator
- {
- /*! maximum supported alignment */
- static const size_t maxAlignment = 64;
-
- /*! maximum allocation size */
-
- /* default settings */
- //static const size_t defaultBlockSize = 4096;
-#define maxAllocationSize size_t(2*1024*1024-maxAlignment)
-
- static const size_t MAX_THREAD_USED_BLOCK_SLOTS = 8;
-
- public:
-
- struct ThreadLocal2;
- enum AllocationType { ALIGNED_MALLOC, EMBREE_OS_MALLOC, SHARED, ANY_TYPE };
-
- /*! Per thread structure holding the current memory block. */
- struct __aligned(64) ThreadLocal
- {
- ALIGNED_CLASS_(64);
- public:
-
- /*! Constructor for usage with ThreadLocalData */
- __forceinline ThreadLocal (ThreadLocal2* parent)
- : parent(parent), ptr(nullptr), cur(0), end(0), allocBlockSize(0), bytesUsed(0), bytesWasted(0) {}
-
- /*! initialize allocator */
- void init(FastAllocator* alloc)
- {
- ptr = nullptr;
- cur = end = 0;
- bytesUsed = 0;
- bytesWasted = 0;
- allocBlockSize = 0;
- if (alloc) allocBlockSize = alloc->defaultBlockSize;
- }
-
- /* Allocate aligned memory from the threads memory block. */
- __forceinline void* malloc(FastAllocator* alloc, size_t bytes, size_t align = 16)
- {
- /* bind the thread local allocator to the proper FastAllocator*/
- parent->bind(alloc);
-
- assert(align <= maxAlignment);
- bytesUsed += bytes;
-
- /* try to allocate in local block */
- size_t ofs = (align - cur) & (align-1);
- cur += bytes + ofs;
- if (likely(cur <= end)) { bytesWasted += ofs; return &ptr[cur - bytes]; }
- cur -= bytes + ofs;
-
- /* if allocation is too large allocate with parent allocator */
- if (4*bytes > allocBlockSize) {
- return alloc->malloc(bytes,maxAlignment,false);
- }
-
- /* get new partial block if allocation failed */
- size_t blockSize = allocBlockSize;
- ptr = (char*) alloc->malloc(blockSize,maxAlignment,true);
- bytesWasted += end-cur;
- cur = 0; end = blockSize;
-
- /* retry allocation */
- ofs = (align - cur) & (align-1);
- cur += bytes + ofs;
- if (likely(cur <= end)) { bytesWasted += ofs; return &ptr[cur - bytes]; }
- cur -= bytes + ofs;
-
- /* get new full block if allocation failed */
- blockSize = allocBlockSize;
- ptr = (char*) alloc->malloc(blockSize,maxAlignment,false);
- bytesWasted += end-cur;
- cur = 0; end = blockSize;
-
- /* retry allocation */
- ofs = (align - cur) & (align-1);
- cur += bytes + ofs;
- if (likely(cur <= end)) { bytesWasted += ofs; return &ptr[cur - bytes]; }
- cur -= bytes + ofs;
-
- /* should never happen as large allocations get handled specially above */
- assert(false);
- return nullptr;
- }
-
-
- /*! returns amount of used bytes */
- __forceinline size_t getUsedBytes() const { return bytesUsed; }
-
- /*! returns amount of free bytes */
- __forceinline size_t getFreeBytes() const { return end-cur; }
-
- /*! returns amount of wasted bytes */
- __forceinline size_t getWastedBytes() const { return bytesWasted; }
-
- private:
- ThreadLocal2* parent;
- char* ptr; //!< pointer to memory block
- size_t cur; //!< current location of the allocator
- size_t end; //!< end of the memory block
- size_t allocBlockSize; //!< block size for allocations
- size_t bytesUsed; //!< number of total bytes allocated
- size_t bytesWasted; //!< number of bytes wasted
- };
-
- /*! Two thread local structures. */
- struct __aligned(64) ThreadLocal2
- {
- ALIGNED_CLASS_(64);
- public:
-
- __forceinline ThreadLocal2()
- : alloc(nullptr), alloc0(this), alloc1(this) {}
-
- /*! bind to fast allocator */
- __forceinline void bind(FastAllocator* alloc_i)
- {
- assert(alloc_i);
- if (alloc.load() == alloc_i) return;
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(mutex);
-#else
- Lock<SpinLock> lock(mutex);
-#endif
- //if (alloc.load() == alloc_i) return; // not required as only one thread calls bind
- if (alloc.load()) {
- alloc.load()->bytesUsed += alloc0.getUsedBytes() + alloc1.getUsedBytes();
- alloc.load()->bytesFree += alloc0.getFreeBytes() + alloc1.getFreeBytes();
- alloc.load()->bytesWasted += alloc0.getWastedBytes() + alloc1.getWastedBytes();
- }
- alloc0.init(alloc_i);
- alloc1.init(alloc_i);
- alloc.store(alloc_i);
- alloc_i->join(this);
- }
-
- /*! unbind to fast allocator */
- void unbind(FastAllocator* alloc_i)
- {
- assert(alloc_i);
- if (alloc.load() != alloc_i) return;
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(mutex);
-#else
- Lock<SpinLock> lock(mutex);
-#endif
- if (alloc.load() != alloc_i) return; // required as a different thread calls unbind
- alloc.load()->bytesUsed += alloc0.getUsedBytes() + alloc1.getUsedBytes();
- alloc.load()->bytesFree += alloc0.getFreeBytes() + alloc1.getFreeBytes();
- alloc.load()->bytesWasted += alloc0.getWastedBytes() + alloc1.getWastedBytes();
- alloc0.init(nullptr);
- alloc1.init(nullptr);
- alloc.store(nullptr);
- }
-
- public:
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::mutex mutex;
-#else
- SpinLock mutex; //!< required as unbind is called from other threads
-#endif
- std::atomic<FastAllocator*> alloc; //!< parent allocator
- ThreadLocal alloc0;
- ThreadLocal alloc1;
- };
-
- FastAllocator (Device* device, bool osAllocation)
- : device(device), slotMask(0), usedBlocks(nullptr), freeBlocks(nullptr), use_single_mode(false), defaultBlockSize(PAGE_SIZE), estimatedSize(0),
- growSize(PAGE_SIZE), maxGrowSize(maxAllocationSize), log2_grow_size_scale(0), bytesUsed(0), bytesFree(0), bytesWasted(0), atype(osAllocation ? EMBREE_OS_MALLOC : ALIGNED_MALLOC),
- primrefarray(device,0)
- {
- for (size_t i=0; i<MAX_THREAD_USED_BLOCK_SLOTS; i++)
- {
- threadUsedBlocks[i] = nullptr;
- threadBlocks[i] = nullptr;
- assert(!slotMutex[i].isLocked());
- }
- }
-
- ~FastAllocator () {
- clear();
- }
-
- /*! returns the device attached to this allocator */
- Device* getDevice() {
- return device;
- }
-
- void share(mvector<PrimRef>& primrefarray_i) {
- primrefarray = std::move(primrefarray_i);
- }
-
- void unshare(mvector<PrimRef>& primrefarray_o)
- {
- reset(); // this removes blocks that are allocated inside the shared primref array
- primrefarray_o = std::move(primrefarray);
- }
-
- /*! returns first fast thread local allocator */
- __forceinline ThreadLocal* _threadLocal() {
- return &threadLocal2()->alloc0;
- }
-
- void setOSallocation(bool flag)
- {
- atype = flag ? EMBREE_OS_MALLOC : ALIGNED_MALLOC;
- }
-
- private:
-
- /*! returns both fast thread local allocators */
- __forceinline ThreadLocal2* threadLocal2()
- {
- ThreadLocal2* alloc = thread_local_allocator2;
- if (alloc == nullptr) {
- thread_local_allocator2 = alloc = new ThreadLocal2;
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(s_thread_local_allocators_lock);
-#else
- Lock<SpinLock> lock(s_thread_local_allocators_lock);
-#endif
- s_thread_local_allocators.push_back(make_unique(alloc));
- }
- return alloc;
- }
-
- public:
-
- __forceinline void join(ThreadLocal2* alloc)
- {
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(s_thread_local_allocators_lock);
-#else
- Lock<SpinLock> lock(thread_local_allocators_lock);
-#endif
- thread_local_allocators.push_back(alloc);
- }
-
- public:
-
- struct CachedAllocator
- {
- __forceinline CachedAllocator(void* ptr)
- : alloc(nullptr), talloc0(nullptr), talloc1(nullptr)
- {
- assert(ptr == nullptr);
- }
-
- __forceinline CachedAllocator(FastAllocator* alloc, ThreadLocal2* talloc)
- : alloc(alloc), talloc0(&talloc->alloc0), talloc1(alloc->use_single_mode ? &talloc->alloc0 : &talloc->alloc1) {}
-
- __forceinline operator bool () const {
- return alloc != nullptr;
- }
-
- __forceinline void* operator() (size_t bytes, size_t align = 16) const {
- return talloc0->malloc(alloc,bytes,align);
- }
-
- __forceinline void* malloc0 (size_t bytes, size_t align = 16) const {
- return talloc0->malloc(alloc,bytes,align);
- }
-
- __forceinline void* malloc1 (size_t bytes, size_t align = 16) const {
- return talloc1->malloc(alloc,bytes,align);
- }
-
- public:
- FastAllocator* alloc;
- ThreadLocal* talloc0;
- ThreadLocal* talloc1;
- };
-
- __forceinline CachedAllocator getCachedAllocator() {
- return CachedAllocator(this,threadLocal2());
- }
-
- /*! Builder interface to create thread local allocator */
- struct Create
- {
- public:
- __forceinline Create (FastAllocator* allocator) : allocator(allocator) {}
- __forceinline CachedAllocator operator() () const { return allocator->getCachedAllocator(); }
-
- private:
- FastAllocator* allocator;
- };
-
- void internal_fix_used_blocks()
- {
- /* move thread local blocks to global block list */
- for (size_t i = 0; i < MAX_THREAD_USED_BLOCK_SLOTS; i++)
- {
- while (threadBlocks[i].load() != nullptr) {
- Block* nextUsedBlock = threadBlocks[i].load()->next;
- threadBlocks[i].load()->next = usedBlocks.load();
- usedBlocks = threadBlocks[i].load();
- threadBlocks[i] = nextUsedBlock;
- }
- threadBlocks[i] = nullptr;
- }
- }
-
- static const size_t threadLocalAllocOverhead = 20; //! 20 means 5% parallel allocation overhead through unfilled thread local blocks
-#if defined(__AVX512ER__) // KNL
- static const size_t mainAllocOverheadStatic = 15; //! 15 means 7.5% allocation overhead through unfilled main alloc blocks
-#else
- static const size_t mainAllocOverheadStatic = 20; //! 20 means 5% allocation overhead through unfilled main alloc blocks
-#endif
- static const size_t mainAllocOverheadDynamic = 8; //! 20 means 12.5% allocation overhead through unfilled main alloc blocks
-
- /* calculates a single threaded threshold for the builders such
- * that for small scenes the overhead of partly allocated blocks
- * per thread is low */
- size_t fixSingleThreadThreshold(size_t branchingFactor, size_t defaultThreshold, size_t numPrimitives, size_t bytesEstimated)
- {
- if (numPrimitives == 0 || bytesEstimated == 0)
- return defaultThreshold;
-
- /* calculate block size in bytes to fulfill threadLocalAllocOverhead constraint */
- const size_t single_mode_factor = use_single_mode ? 1 : 2;
- const size_t threadCount = TaskScheduler::threadCount();
- const size_t singleThreadBytes = single_mode_factor*threadLocalAllocOverhead*defaultBlockSize;
-
- /* if we do not have to limit number of threads use optimal thresdhold */
- if ( (bytesEstimated+(singleThreadBytes-1))/singleThreadBytes >= threadCount)
- return defaultThreshold;
-
- /* otherwise limit number of threads by calculating proper single thread threshold */
- else {
- double bytesPerPrimitive = double(bytesEstimated)/double(numPrimitives);
- return size_t(ceil(branchingFactor*singleThreadBytes/bytesPerPrimitive));
- }
- }
-
- __forceinline size_t alignSize(size_t i) {
- return (i+127)/128*128;
- }
-
- /*! initializes the grow size */
- __forceinline void initGrowSizeAndNumSlots(size_t bytesEstimated, bool fast)
- {
- /* we do not need single thread local allocator mode */
- use_single_mode = false;
-
- /* calculate growSize such that at most mainAllocationOverhead gets wasted when a block stays unused */
- size_t mainAllocOverhead = fast ? mainAllocOverheadDynamic : mainAllocOverheadStatic;
- size_t blockSize = alignSize(bytesEstimated/mainAllocOverhead);
- growSize = maxGrowSize = clamp(blockSize,size_t(1024),maxAllocationSize);
-
- /* if we reached the maxAllocationSize for growSize, we can
- * increase the number of allocation slots by still guaranteeing
- * the mainAllocationOverhead */
- slotMask = 0x0;
-
- if (MAX_THREAD_USED_BLOCK_SLOTS >= 2 && bytesEstimated > 2*mainAllocOverhead*growSize) slotMask = 0x1;
- if (MAX_THREAD_USED_BLOCK_SLOTS >= 4 && bytesEstimated > 4*mainAllocOverhead*growSize) slotMask = 0x3;
- if (MAX_THREAD_USED_BLOCK_SLOTS >= 8 && bytesEstimated > 8*mainAllocOverhead*growSize) slotMask = 0x7;
- if (MAX_THREAD_USED_BLOCK_SLOTS >= 8 && bytesEstimated > 16*mainAllocOverhead*growSize) { growSize *= 2; } /* if the overhead is tiny, double the growSize */
-
- /* set the thread local alloc block size */
- size_t defaultBlockSizeSwitch = PAGE_SIZE+maxAlignment;
-
- /* for sufficiently large scene we can increase the defaultBlockSize over the defaultBlockSizeSwitch size */
-#if 0 // we do not do this as a block size of 4160 if for some reason best for KNL
- const size_t threadCount = TaskScheduler::threadCount();
- const size_t single_mode_factor = use_single_mode ? 1 : 2;
- const size_t singleThreadBytes = single_mode_factor*threadLocalAllocOverhead*defaultBlockSizeSwitch;
- if (bytesEstimated+(singleThreadBytes-1))/singleThreadBytes >= threadCount)
- defaultBlockSize = min(max(defaultBlockSizeSwitch,bytesEstimated/(single_mode_factor*threadLocalAllocOverhead*threadCount)),growSize);
-
- /* otherwise we grow the defaultBlockSize up to defaultBlockSizeSwitch */
- else
-#endif
- defaultBlockSize = clamp(blockSize,size_t(1024),defaultBlockSizeSwitch);
-
- if (bytesEstimated == 0) {
- maxGrowSize = maxAllocationSize; // special mode if builder cannot estimate tree size
- defaultBlockSize = defaultBlockSizeSwitch;
- }
- log2_grow_size_scale = 0;
-
- if (device->alloc_main_block_size != 0) growSize = device->alloc_main_block_size;
- if (device->alloc_num_main_slots >= 1 ) slotMask = 0x0;
- if (device->alloc_num_main_slots >= 2 ) slotMask = 0x1;
- if (device->alloc_num_main_slots >= 4 ) slotMask = 0x3;
- if (device->alloc_num_main_slots >= 8 ) slotMask = 0x7;
- if (device->alloc_thread_block_size != 0) defaultBlockSize = device->alloc_thread_block_size;
- if (device->alloc_single_thread_alloc != -1) use_single_mode = device->alloc_single_thread_alloc;
- }
-
- /*! initializes the allocator */
- void init(size_t bytesAllocate, size_t bytesReserve, size_t bytesEstimate)
- {
- internal_fix_used_blocks();
- /* distribute the allocation to multiple thread block slots */
- slotMask = MAX_THREAD_USED_BLOCK_SLOTS-1; // FIXME: remove
- if (usedBlocks.load() || freeBlocks.load()) { reset(); return; }
- if (bytesReserve == 0) bytesReserve = bytesAllocate;
- freeBlocks = Block::create(device,bytesAllocate,bytesReserve,nullptr,atype);
- estimatedSize = bytesEstimate;
- initGrowSizeAndNumSlots(bytesEstimate,true);
- }
-
- /*! initializes the allocator */
- void init_estimate(size_t bytesEstimate)
- {
- internal_fix_used_blocks();
- if (usedBlocks.load() || freeBlocks.load()) { reset(); return; }
- /* single allocator mode ? */
- estimatedSize = bytesEstimate;
- //initGrowSizeAndNumSlots(bytesEstimate,false);
- initGrowSizeAndNumSlots(bytesEstimate,false);
-
- }
-
- /*! frees state not required after build */
- __forceinline void cleanup()
- {
- internal_fix_used_blocks();
-
- /* unbind all thread local allocators */
- for (auto alloc : thread_local_allocators) alloc->unbind(this);
- thread_local_allocators.clear();
- }
-
- /*! resets the allocator, memory blocks get reused */
- void reset ()
- {
- internal_fix_used_blocks();
-
- bytesUsed.store(0);
- bytesFree.store(0);
- bytesWasted.store(0);
-
- /* reset all used blocks and move them to begin of free block list */
- while (usedBlocks.load() != nullptr) {
- usedBlocks.load()->reset_block();
- Block* nextUsedBlock = usedBlocks.load()->next;
- usedBlocks.load()->next = freeBlocks.load();
- freeBlocks = usedBlocks.load();
- usedBlocks = nextUsedBlock;
- }
-
- /* remove all shared blocks as they are re-added during build */
- freeBlocks.store(Block::remove_shared_blocks(freeBlocks.load()));
-
- for (size_t i=0; i<MAX_THREAD_USED_BLOCK_SLOTS; i++)
- {
- threadUsedBlocks[i] = nullptr;
- threadBlocks[i] = nullptr;
- }
-
- /* unbind all thread local allocators */
- for (auto alloc : thread_local_allocators) alloc->unbind(this);
- thread_local_allocators.clear();
- }
-
- /*! frees all allocated memory */
- __forceinline void clear()
- {
- cleanup();
- bytesUsed.store(0);
- bytesFree.store(0);
- bytesWasted.store(0);
- if (usedBlocks.load() != nullptr) usedBlocks.load()->clear_list(device); usedBlocks = nullptr;
- if (freeBlocks.load() != nullptr) freeBlocks.load()->clear_list(device); freeBlocks = nullptr;
- for (size_t i=0; i<MAX_THREAD_USED_BLOCK_SLOTS; i++) {
- threadUsedBlocks[i] = nullptr;
- threadBlocks[i] = nullptr;
- }
- primrefarray.clear();
- }
-
- __forceinline size_t incGrowSizeScale()
- {
- size_t scale = log2_grow_size_scale.fetch_add(1)+1;
- return size_t(1) << min(size_t(16),scale);
- }
-
- /*! thread safe allocation of memory */
- void* malloc(size_t& bytes, size_t align, bool partial)
- {
- assert(align <= maxAlignment);
-
- while (true)
- {
- /* allocate using current block */
- size_t threadID = TaskScheduler::threadID();
- size_t slot = threadID & slotMask;
- Block* myUsedBlocks = threadUsedBlocks[slot];
- if (myUsedBlocks) {
- void* ptr = myUsedBlocks->malloc(device,bytes,align,partial);
- if (ptr) return ptr;
- }
-
- /* throw error if allocation is too large */
- if (bytes > maxAllocationSize)
- throw_RTCError(RTC_ERROR_UNKNOWN,"allocation is too large");
-
- /* parallel block creation in case of no freeBlocks, avoids single global mutex */
- if (likely(freeBlocks.load() == nullptr))
- {
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(slotMutex[slot]);
-#else
- Lock<SpinLock> lock(slotMutex[slot]);
-#endif
- if (myUsedBlocks == threadUsedBlocks[slot]) {
- const size_t alignedBytes = (bytes+(align-1)) & ~(align-1);
- const size_t allocSize = max(min(growSize,maxGrowSize),alignedBytes);
- assert(allocSize >= bytes);
- threadBlocks[slot] = threadUsedBlocks[slot] = Block::create(device,allocSize,allocSize,threadBlocks[slot],atype); // FIXME: a large allocation might throw away a block here!
- // FIXME: a direct allocation should allocate inside the block here, and not in the next loop! a different thread could do some allocation and make the large allocation fail.
- }
- continue;
- }
-
- /* if this fails allocate new block */
- {
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(mutex);
-#else
- Lock<SpinLock> lock(mutex);
-#endif
- if (myUsedBlocks == threadUsedBlocks[slot])
- {
- if (freeBlocks.load() != nullptr) {
- Block* nextFreeBlock = freeBlocks.load()->next;
- freeBlocks.load()->next = usedBlocks;
- __memory_barrier();
- usedBlocks = freeBlocks.load();
- threadUsedBlocks[slot] = freeBlocks.load();
- freeBlocks = nextFreeBlock;
- } else {
- const size_t allocSize = min(growSize*incGrowSizeScale(),maxGrowSize);
- usedBlocks = threadUsedBlocks[slot] = Block::create(device,allocSize,allocSize,usedBlocks,atype); // FIXME: a large allocation should get delivered directly, like above!
- }
- }
- }
- }
- }
-
- /*! add new block */
- void addBlock(void* ptr, ssize_t bytes)
- {
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(mutex);
-#else
- Lock<SpinLock> lock(mutex);
-#endif
- const size_t sizeof_Header = offsetof(Block,data[0]);
- void* aptr = (void*) ((((size_t)ptr)+maxAlignment-1) & ~(maxAlignment-1));
- size_t ofs = (size_t) aptr - (size_t) ptr;
- bytes -= ofs;
- if (bytes < 4096) return; // ignore empty or very small blocks
- freeBlocks = new (aptr) Block(SHARED,bytes-sizeof_Header,bytes-sizeof_Header,freeBlocks,ofs);
- }
-
- /* special allocation only used from morton builder only a single time for each build */
- void* specialAlloc(size_t bytes)
- {
- assert(freeBlocks.load() != nullptr && freeBlocks.load()->getBlockAllocatedBytes() >= bytes);
- return freeBlocks.load()->ptr();
- }
-
- struct Statistics
- {
- Statistics ()
- : bytesUsed(0), bytesFree(0), bytesWasted(0) {}
-
- Statistics (size_t bytesUsed, size_t bytesFree, size_t bytesWasted)
- : bytesUsed(bytesUsed), bytesFree(bytesFree), bytesWasted(bytesWasted) {}
-
- Statistics (FastAllocator* alloc, AllocationType atype, bool huge_pages = false)
- : bytesUsed(0), bytesFree(0), bytesWasted(0)
- {
- Block* usedBlocks = alloc->usedBlocks.load();
- Block* freeBlocks = alloc->freeBlocks.load();
- if (usedBlocks) bytesUsed += usedBlocks->getUsedBytes(atype,huge_pages);
- if (freeBlocks) bytesFree += freeBlocks->getAllocatedBytes(atype,huge_pages);
- if (usedBlocks) bytesFree += usedBlocks->getFreeBytes(atype,huge_pages);
- if (freeBlocks) bytesWasted += freeBlocks->getWastedBytes(atype,huge_pages);
- if (usedBlocks) bytesWasted += usedBlocks->getWastedBytes(atype,huge_pages);
- }
-
- std::string str(size_t numPrimitives)
- {
- std::stringstream str;
- str.setf(std::ios::fixed, std::ios::floatfield);
- str << "used = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesUsed << " MB, "
- << "free = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesFree << " MB, "
- << "wasted = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesWasted << " MB, "
- << "total = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesAllocatedTotal() << " MB, "
- << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytesAllocatedTotal())/double(numPrimitives);
- return str.str();
- }
-
- friend Statistics operator+ ( const Statistics& a, const Statistics& b)
- {
- return Statistics(a.bytesUsed+b.bytesUsed,
- a.bytesFree+b.bytesFree,
- a.bytesWasted+b.bytesWasted);
- }
-
- size_t bytesAllocatedTotal() const {
- return bytesUsed + bytesFree + bytesWasted;
- }
-
- public:
- size_t bytesUsed;
- size_t bytesFree;
- size_t bytesWasted;
- };
-
- Statistics getStatistics(AllocationType atype, bool huge_pages = false) {
- return Statistics(this,atype,huge_pages);
- }
-
- size_t getUsedBytes() {
- return bytesUsed;
- }
-
- size_t getWastedBytes() {
- return bytesWasted;
- }
-
- struct AllStatistics
- {
- AllStatistics (FastAllocator* alloc)
-
- : bytesUsed(alloc->bytesUsed),
- bytesFree(alloc->bytesFree),
- bytesWasted(alloc->bytesWasted),
- stat_all(alloc,ANY_TYPE),
- stat_malloc(alloc,ALIGNED_MALLOC),
- stat_4K(alloc,EMBREE_OS_MALLOC,false),
- stat_2M(alloc,EMBREE_OS_MALLOC,true),
- stat_shared(alloc,SHARED) {}
-
- AllStatistics (size_t bytesUsed,
- size_t bytesFree,
- size_t bytesWasted,
- Statistics stat_all,
- Statistics stat_malloc,
- Statistics stat_4K,
- Statistics stat_2M,
- Statistics stat_shared)
-
- : bytesUsed(bytesUsed),
- bytesFree(bytesFree),
- bytesWasted(bytesWasted),
- stat_all(stat_all),
- stat_malloc(stat_malloc),
- stat_4K(stat_4K),
- stat_2M(stat_2M),
- stat_shared(stat_shared) {}
-
- friend AllStatistics operator+ (const AllStatistics& a, const AllStatistics& b)
- {
- return AllStatistics(a.bytesUsed+b.bytesUsed,
- a.bytesFree+b.bytesFree,
- a.bytesWasted+b.bytesWasted,
- a.stat_all + b.stat_all,
- a.stat_malloc + b.stat_malloc,
- a.stat_4K + b.stat_4K,
- a.stat_2M + b.stat_2M,
- a.stat_shared + b.stat_shared);
- }
-
- void print(size_t numPrimitives)
- {
- std::stringstream str0;
- str0.setf(std::ios::fixed, std::ios::floatfield);
- str0 << " alloc : "
- << "used = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesUsed << " MB, "
- << " "
- << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytesUsed)/double(numPrimitives);
- std::cout << str0.str() << std::endl;
-
- std::stringstream str1;
- str1.setf(std::ios::fixed, std::ios::floatfield);
- str1 << " alloc : "
- << "used = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesUsed << " MB, "
- << "free = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesFree << " MB, "
- << "wasted = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesWasted << " MB, "
- << "total = " << std::setw(7) << std::setprecision(3) << 1E-6f*(bytesUsed+bytesFree+bytesWasted) << " MB, "
- << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytesUsed+bytesFree+bytesWasted)/double(numPrimitives);
- std::cout << str1.str() << std::endl;
-
- std::cout << " total : " << stat_all.str(numPrimitives) << std::endl;
- std::cout << " 4K : " << stat_4K.str(numPrimitives) << std::endl;
- std::cout << " 2M : " << stat_2M.str(numPrimitives) << std::endl;
- std::cout << " malloc: " << stat_malloc.str(numPrimitives) << std::endl;
- std::cout << " shared: " << stat_shared.str(numPrimitives) << std::endl;
- }
-
- private:
- size_t bytesUsed;
- size_t bytesFree;
- size_t bytesWasted;
- Statistics stat_all;
- Statistics stat_malloc;
- Statistics stat_4K;
- Statistics stat_2M;
- Statistics stat_shared;
- };
-
- void print_blocks()
- {
- std::cout << " estimatedSize = " << estimatedSize << ", slotMask = " << slotMask << ", use_single_mode = " << use_single_mode << ", maxGrowSize = " << maxGrowSize << ", defaultBlockSize = " << defaultBlockSize << std::endl;
-
- std::cout << " used blocks = ";
- if (usedBlocks.load() != nullptr) usedBlocks.load()->print_list();
- std::cout << "[END]" << std::endl;
-
- std::cout << " free blocks = ";
- if (freeBlocks.load() != nullptr) freeBlocks.load()->print_list();
- std::cout << "[END]" << std::endl;
- }
-
- private:
-
- struct Block
- {
- static Block* create(MemoryMonitorInterface* device, size_t bytesAllocate, size_t bytesReserve, Block* next, AllocationType atype)
- {
- /* We avoid using os_malloc for small blocks as this could
- * cause a risk of fragmenting the virtual address space and
- * reach the limit of vm.max_map_count = 65k under Linux. */
- if (atype == EMBREE_OS_MALLOC && bytesAllocate < maxAllocationSize)
- atype = ALIGNED_MALLOC;
-
- /* we need to additionally allocate some header */
- const size_t sizeof_Header = offsetof(Block,data[0]);
- bytesAllocate = sizeof_Header+bytesAllocate;
- bytesReserve = sizeof_Header+bytesReserve;
-
- /* consume full 4k pages with using os_malloc */
- if (atype == EMBREE_OS_MALLOC) {
- bytesAllocate = ((bytesAllocate+PAGE_SIZE-1) & ~(PAGE_SIZE-1));
- bytesReserve = ((bytesReserve +PAGE_SIZE-1) & ~(PAGE_SIZE-1));
- }
-
- /* either use alignedMalloc or os_malloc */
- void *ptr = nullptr;
- if (atype == ALIGNED_MALLOC)
- {
- /* special handling for default block size */
- if (bytesAllocate == (2*PAGE_SIZE_2M))
- {
- const size_t alignment = maxAlignment;
- if (device) device->memoryMonitor(bytesAllocate+alignment,false);
- ptr = alignedMalloc(bytesAllocate,alignment);
-
- /* give hint to transparently convert these pages to 2MB pages */
- const size_t ptr_aligned_begin = ((size_t)ptr) & ~size_t(PAGE_SIZE_2M-1);
- os_advise((void*)(ptr_aligned_begin + 0),PAGE_SIZE_2M); // may fail if no memory mapped before block
- os_advise((void*)(ptr_aligned_begin + 1*PAGE_SIZE_2M),PAGE_SIZE_2M);
- os_advise((void*)(ptr_aligned_begin + 2*PAGE_SIZE_2M),PAGE_SIZE_2M); // may fail if no memory mapped after block
-
- return new (ptr) Block(ALIGNED_MALLOC,bytesAllocate-sizeof_Header,bytesAllocate-sizeof_Header,next,alignment);
- }
- else
- {
- const size_t alignment = maxAlignment;
- if (device) device->memoryMonitor(bytesAllocate+alignment,false);
- ptr = alignedMalloc(bytesAllocate,alignment);
- return new (ptr) Block(ALIGNED_MALLOC,bytesAllocate-sizeof_Header,bytesAllocate-sizeof_Header,next,alignment);
- }
- }
- else if (atype == EMBREE_OS_MALLOC)
- {
- if (device) device->memoryMonitor(bytesAllocate,false);
- bool huge_pages; ptr = os_malloc(bytesReserve,huge_pages);
- return new (ptr) Block(EMBREE_OS_MALLOC,bytesAllocate-sizeof_Header,bytesReserve-sizeof_Header,next,0,huge_pages);
- }
- else
- assert(false);
-
- return NULL;
- }
-
- Block (AllocationType atype, size_t bytesAllocate, size_t bytesReserve, Block* next, size_t wasted, bool huge_pages = false)
- : cur(0), allocEnd(bytesAllocate), reserveEnd(bytesReserve), next(next), wasted(wasted), atype(atype), huge_pages(huge_pages)
- {
- assert((((size_t)&data[0]) & (maxAlignment-1)) == 0);
- }
-
- static Block* remove_shared_blocks(Block* head)
- {
- Block** prev_next = &head;
- for (Block* block = head; block; block = block->next) {
- if (block->atype == SHARED) *prev_next = block->next;
- else prev_next = &block->next;
- }
- return head;
- }
-
- void clear_list(MemoryMonitorInterface* device)
- {
- Block* block = this;
- while (block) {
- Block* next = block->next;
- block->clear_block(device);
- block = next;
- }
- }
-
- void clear_block (MemoryMonitorInterface* device)
- {
- const size_t sizeof_Header = offsetof(Block,data[0]);
- const ssize_t sizeof_Alloced = wasted+sizeof_Header+getBlockAllocatedBytes();
-
- if (atype == ALIGNED_MALLOC) {
- alignedFree(this);
- if (device) device->memoryMonitor(-sizeof_Alloced,true);
- }
-
- else if (atype == EMBREE_OS_MALLOC) {
- size_t sizeof_This = sizeof_Header+reserveEnd;
- os_free(this,sizeof_This,huge_pages);
- if (device) device->memoryMonitor(-sizeof_Alloced,true);
- }
-
- else /* if (atype == SHARED) */ {
- }
- }
-
- void* malloc(MemoryMonitorInterface* device, size_t& bytes_in, size_t align, bool partial)
- {
- size_t bytes = bytes_in;
- assert(align <= maxAlignment);
- bytes = (bytes+(align-1)) & ~(align-1);
- if (unlikely(cur+bytes > reserveEnd && !partial)) return nullptr;
- const size_t i = cur.fetch_add(bytes);
- if (unlikely(i+bytes > reserveEnd && !partial)) return nullptr;
- if (unlikely(i > reserveEnd)) return nullptr;
- bytes_in = bytes = min(bytes,reserveEnd-i);
-
- if (i+bytes > allocEnd) {
- if (device) device->memoryMonitor(i+bytes-max(i,allocEnd),true);
- }
- return &data[i];
- }
-
- void* ptr() {
- return &data[cur];
- }
-
- void reset_block ()
- {
- allocEnd = max(allocEnd,(size_t)cur);
- cur = 0;
- }
-
- size_t getBlockUsedBytes() const {
- return min(size_t(cur),reserveEnd);
- }
-
- size_t getBlockFreeBytes() const {
- return getBlockAllocatedBytes() - getBlockUsedBytes();
- }
-
- size_t getBlockAllocatedBytes() const {
- return min(max(allocEnd,size_t(cur)),reserveEnd);
- }
-
- size_t getBlockWastedBytes() const {
- const size_t sizeof_Header = offsetof(Block,data[0]);
- return sizeof_Header + wasted;
- }
-
- size_t getBlockReservedBytes() const {
- return reserveEnd;
- }
-
- bool hasType(AllocationType atype_i, bool huge_pages_i) const
- {
- if (atype_i == ANY_TYPE ) return true;
- else if (atype == EMBREE_OS_MALLOC) return atype_i == atype && huge_pages_i == huge_pages;
- else return atype_i == atype;
- }
-
- size_t getUsedBytes(AllocationType atype, bool huge_pages = false) const {
- size_t bytes = 0;
- for (const Block* block = this; block; block = block->next) {
- if (!block->hasType(atype,huge_pages)) continue;
- bytes += block->getBlockUsedBytes();
- }
- return bytes;
- }
-
- size_t getFreeBytes(AllocationType atype, bool huge_pages = false) const {
- size_t bytes = 0;
- for (const Block* block = this; block; block = block->next) {
- if (!block->hasType(atype,huge_pages)) continue;
- bytes += block->getBlockFreeBytes();
- }
- return bytes;
- }
-
- size_t getWastedBytes(AllocationType atype, bool huge_pages = false) const {
- size_t bytes = 0;
- for (const Block* block = this; block; block = block->next) {
- if (!block->hasType(atype,huge_pages)) continue;
- bytes += block->getBlockWastedBytes();
- }
- return bytes;
- }
-
- size_t getAllocatedBytes(AllocationType atype, bool huge_pages = false) const {
- size_t bytes = 0;
- for (const Block* block = this; block; block = block->next) {
- if (!block->hasType(atype,huge_pages)) continue;
- bytes += block->getBlockAllocatedBytes();
- }
- return bytes;
- }
-
- void print_list ()
- {
- for (const Block* block = this; block; block = block->next)
- block->print_block();
- }
-
- void print_block() const
- {
- if (atype == ALIGNED_MALLOC) std::cout << "A";
- else if (atype == EMBREE_OS_MALLOC) std::cout << "O";
- else if (atype == SHARED) std::cout << "S";
- if (huge_pages) std::cout << "H";
- size_t bytesUsed = getBlockUsedBytes();
- size_t bytesFree = getBlockFreeBytes();
- size_t bytesWasted = getBlockWastedBytes();
- std::cout << "[" << bytesUsed << ", " << bytesFree << ", " << bytesWasted << "] ";
- }
-
- public:
- std::atomic<size_t> cur; //!< current location of the allocator
- std::atomic<size_t> allocEnd; //!< end of the allocated memory region
- std::atomic<size_t> reserveEnd; //!< end of the reserved memory region
- Block* next; //!< pointer to next block in list
- size_t wasted; //!< amount of memory wasted through block alignment
- AllocationType atype; //!< allocation mode of the block
- bool huge_pages; //!< whether the block uses huge pages
- char align[maxAlignment-5*sizeof(size_t)-sizeof(AllocationType)-sizeof(bool)]; //!< align data to maxAlignment
- char data[1]; //!< here starts memory to use for allocations
- };
-
- private:
- Device* device;
- SpinLock mutex;
- size_t slotMask;
- std::atomic<Block*> threadUsedBlocks[MAX_THREAD_USED_BLOCK_SLOTS];
- std::atomic<Block*> usedBlocks;
- std::atomic<Block*> freeBlocks;
-
- std::atomic<Block*> threadBlocks[MAX_THREAD_USED_BLOCK_SLOTS];
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::mutex slotMutex[MAX_THREAD_USED_BLOCK_SLOTS];
-#else
- SpinLock slotMutex[MAX_THREAD_USED_BLOCK_SLOTS];
-#endif
-
- bool use_single_mode;
- size_t defaultBlockSize;
- size_t estimatedSize;
- size_t growSize;
- size_t maxGrowSize;
- std::atomic<size_t> log2_grow_size_scale; //!< log2 of scaling factor for grow size // FIXME: remove
- std::atomic<size_t> bytesUsed;
- std::atomic<size_t> bytesFree;
- std::atomic<size_t> bytesWasted;
- static __thread ThreadLocal2* thread_local_allocator2;
- static SpinLock s_thread_local_allocators_lock;
- static std::vector<std::unique_ptr<ThreadLocal2>> s_thread_local_allocators;
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::mutex thread_local_allocators_lock;
-#else
- SpinLock thread_local_allocators_lock;
-#endif
- std::vector<ThreadLocal2*> thread_local_allocators;
- AllocationType atype;
- mvector<PrimRef> primrefarray; //!< primrefarray used to allocate nodes
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/buffer.h b/thirdparty/embree-aarch64/kernels/common/buffer.h
deleted file mode 100644
index 02d319c59d..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/buffer.h
+++ /dev/null
@@ -1,263 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "device.h"
-
-namespace embree
-{
- /*! Implements an API data buffer object. This class may or may not own the data. */
- class Buffer : public RefCount
- {
- public:
- /*! Buffer construction */
- Buffer()
- : device(nullptr), ptr(nullptr), numBytes(0), shared(false) {}
-
- /*! Buffer construction */
- Buffer(Device* device, size_t numBytes_in, void* ptr_in = nullptr)
- : device(device), numBytes(numBytes_in)
- {
- device->refInc();
-
- if (ptr_in)
- {
- shared = true;
- ptr = (char*)ptr_in;
- }
- else
- {
- shared = false;
- alloc();
- }
- }
-
- /*! Buffer destruction */
- ~Buffer() {
- free();
- device->refDec();
- }
-
- /*! this class is not copyable */
- private:
- Buffer(const Buffer& other) DELETED; // do not implement
- Buffer& operator =(const Buffer& other) DELETED; // do not implement
-
- public:
- /* inits and allocates the buffer */
- void create(Device* device_in, size_t numBytes_in)
- {
- init(device_in, numBytes_in);
- alloc();
- }
-
- /* inits the buffer */
- void init(Device* device_in, size_t numBytes_in)
- {
- free();
- device = device_in;
- ptr = nullptr;
- numBytes = numBytes_in;
- shared = false;
- }
-
- /*! sets shared buffer */
- void set(Device* device_in, void* ptr_in, size_t numBytes_in)
- {
- free();
- device = device_in;
- ptr = (char*)ptr_in;
- if (numBytes_in != (size_t)-1)
- numBytes = numBytes_in;
- shared = true;
- }
-
- /*! allocated buffer */
- void alloc()
- {
- if (device)
- device->memoryMonitor(this->bytes(), false);
- size_t b = (this->bytes()+15) & ssize_t(-16);
- ptr = (char*)alignedMalloc(b,16);
- }
-
- /*! frees the buffer */
- void free()
- {
- if (shared) return;
- alignedFree(ptr);
- if (device)
- device->memoryMonitor(-ssize_t(this->bytes()), true);
- ptr = nullptr;
- }
-
- /*! gets buffer pointer */
- void* data()
- {
- /* report error if buffer is not existing */
- if (!device)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer specified");
-
- /* return buffer */
- return ptr;
- }
-
- /*! returns pointer to first element */
- __forceinline char* getPtr() const {
- return ptr;
- }
-
- /*! returns the number of bytes of the buffer */
- __forceinline size_t bytes() const {
- return numBytes;
- }
-
- /*! returns true of the buffer is not empty */
- __forceinline operator bool() const {
- return ptr;
- }
-
- public:
- Device* device; //!< device to report memory usage to
- char* ptr; //!< pointer to buffer data
- size_t numBytes; //!< number of bytes in the buffer
- bool shared; //!< set if memory is shared with application
- };
-
- /*! An untyped contiguous range of a buffer. This class does not own the buffer content. */
- class RawBufferView
- {
- public:
- /*! Buffer construction */
- RawBufferView()
- : ptr_ofs(nullptr), stride(0), num(0), format(RTC_FORMAT_UNDEFINED), modCounter(1), modified(true), userData(0) {}
-
- public:
- /*! sets the buffer view */
- void set(const Ref<Buffer>& buffer_in, size_t offset_in, size_t stride_in, size_t num_in, RTCFormat format_in)
- {
- if ((offset_in + stride_in * num_in) > (stride_in * buffer_in->numBytes))
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "buffer range out of bounds");
-
- ptr_ofs = buffer_in->ptr + offset_in;
- stride = stride_in;
- num = num_in;
- format = format_in;
- modCounter++;
- modified = true;
- buffer = buffer_in;
- }
-
- /*! returns pointer to the first element */
- __forceinline char* getPtr() const {
- return ptr_ofs;
- }
-
- /*! returns pointer to the i'th element */
- __forceinline char* getPtr(size_t i) const
- {
- assert(i<num);
- return ptr_ofs + i*stride;
- }
-
- /*! returns the number of elements of the buffer */
- __forceinline size_t size() const {
- return num;
- }
-
- /*! returns the number of bytes of the buffer */
- __forceinline size_t bytes() const {
- return num*stride;
- }
-
- /*! returns the buffer stride */
- __forceinline unsigned getStride() const
- {
- assert(stride <= unsigned(inf));
- return unsigned(stride);
- }
-
- /*! return the buffer format */
- __forceinline RTCFormat getFormat() const {
- return format;
- }
-
- /*! mark buffer as modified or unmodified */
- __forceinline void setModified() {
- modCounter++;
- modified = true;
- }
-
- /*! mark buffer as modified or unmodified */
- __forceinline bool isModified(unsigned int otherModCounter) const {
- return modCounter > otherModCounter;
- }
-
- /*! mark buffer as modified or unmodified */
- __forceinline bool isLocalModified() const {
- return modified;
- }
-
- /*! clear local modified flag */
- __forceinline void clearLocalModified() {
- modified = false;
- }
-
- /*! returns true of the buffer is not empty */
- __forceinline operator bool() const {
- return ptr_ofs;
- }
-
- /*! checks padding to 16 byte check, fails hard */
- __forceinline void checkPadding16() const
- {
- if (ptr_ofs && num)
- volatile int MAYBE_UNUSED w = *((int*)getPtr(size()-1)+3); // FIXME: is failing hard avoidable?
- }
-
- public:
- char* ptr_ofs; //!< base pointer plus offset
- size_t stride; //!< stride of the buffer in bytes
- size_t num; //!< number of elements in the buffer
- RTCFormat format; //!< format of the buffer
- unsigned int modCounter; //!< version ID of this buffer
- bool modified; //!< local modified data
- int userData; //!< special data
- Ref<Buffer> buffer; //!< reference to the parent buffer
- };
-
- /*! A typed contiguous range of a buffer. This class does not own the buffer content. */
- template<typename T>
- class BufferView : public RawBufferView
- {
- public:
- typedef T value_type;
-
- /*! access to the ith element of the buffer */
- __forceinline T& operator [](size_t i) { assert(i<num); return *(T*)(ptr_ofs + i*stride); }
- __forceinline const T& operator [](size_t i) const { assert(i<num); return *(T*)(ptr_ofs + i*stride); }
- };
-
- template<>
- class BufferView<Vec3fa> : public RawBufferView
- {
- public:
- typedef Vec3fa value_type;
-
- /*! access to the ith element of the buffer */
- __forceinline const Vec3fa operator [](size_t i) const
- {
- assert(i<num);
- return Vec3fa(vfloat4::loadu((float*)(ptr_ofs + i*stride)));
- }
-
- /*! writes the i'th element */
- __forceinline void store(size_t i, const Vec3fa& v)
- {
- assert(i<num);
- vfloat4::storeu((float*)(ptr_ofs + i*stride), (vfloat4)v);
- }
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/builder.h b/thirdparty/embree-aarch64/kernels/common/builder.h
deleted file mode 100644
index d2a1cfe3ce..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/builder.h
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "accel.h"
-
-namespace embree
-{
-#define MODE_HIGH_QUALITY (1<<8)
-
- /*! virtual interface for all hierarchy builders */
- class Builder : public RefCount {
- public:
-
- static const size_t DEFAULT_SINGLE_THREAD_THRESHOLD = 1024;
-
- /*! initiates the hierarchy builder */
- virtual void build() = 0;
-
- /*! notifies the builder about the deletion of some geometry */
- virtual void deleteGeometry(size_t geomID) {};
-
- /*! clears internal builder state */
- virtual void clear() = 0;
- };
-
- /*! virtual interface for progress monitor class */
- struct BuildProgressMonitor {
- virtual void operator() (size_t dn) const = 0;
- };
-
- /*! build the progress monitor interface from a closure */
- template<typename Closure>
- struct ProgressMonitorClosure : BuildProgressMonitor
- {
- public:
- ProgressMonitorClosure (const Closure& closure) : closure(closure) {}
- void operator() (size_t dn) const { closure(dn); }
- private:
- const Closure closure;
- };
- template<typename Closure> __forceinline const ProgressMonitorClosure<Closure> BuildProgressMonitorFromClosure(const Closure& closure) {
- return ProgressMonitorClosure<Closure>(closure);
- }
-
- struct LineSegments;
- struct TriangleMesh;
- struct QuadMesh;
- struct UserGeometry;
-
- class Scene;
-
- typedef void (*createLineSegmentsAccelTy)(Scene* scene, LineSegments* mesh, AccelData*& accel, Builder*& builder);
- typedef void (*createTriangleMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
- typedef void (*createQuadMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
- typedef void (*createUserGeometryAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
-
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/context.h b/thirdparty/embree-aarch64/kernels/common/context.h
deleted file mode 100644
index d0185a74f2..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/context.h
+++ /dev/null
@@ -1,131 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "rtcore.h"
-#include "point_query.h"
-
-namespace embree
-{
- class Scene;
-
- struct IntersectContext
- {
- public:
- __forceinline IntersectContext(Scene* scene, RTCIntersectContext* user_context)
- : scene(scene), user(user_context) {}
-
- __forceinline bool hasContextFilter() const {
- return user->filter != nullptr;
- }
-
- __forceinline bool isCoherent() const {
- return embree::isCoherent(user->flags);
- }
-
- __forceinline bool isIncoherent() const {
- return embree::isIncoherent(user->flags);
- }
-
- public:
- Scene* scene;
- RTCIntersectContext* user;
- };
-
- template<int M, typename Geometry>
- __forceinline Vec4vf<M> enlargeRadiusToMinWidth(const IntersectContext* context, const Geometry* geom, const Vec3vf<M>& ray_org, const Vec4vf<M>& v)
- {
-#if RTC_MIN_WIDTH
- const vfloat<M> d = length(Vec3vf<M>(v) - ray_org);
- const vfloat<M> r = clamp(context->user->minWidthDistanceFactor*d, v.w, geom->maxRadiusScale*v.w);
- return Vec4vf<M>(v.x,v.y,v.z,r);
-#else
- return v;
-#endif
- }
-
- template<typename Geometry>
- __forceinline Vec3ff enlargeRadiusToMinWidth(const IntersectContext* context, const Geometry* geom, const Vec3fa& ray_org, const Vec3ff& v)
- {
-#if RTC_MIN_WIDTH
- const float d = length(Vec3fa(v) - ray_org);
- const float r = clamp(context->user->minWidthDistanceFactor*d, v.w, geom->maxRadiusScale*v.w);
- return Vec3ff(v.x,v.y,v.z,r);
-#else
- return v;
-#endif
- }
-
- enum PointQueryType
- {
- POINT_QUERY_TYPE_UNDEFINED = 0,
- POINT_QUERY_TYPE_SPHERE = 1,
- POINT_QUERY_TYPE_AABB = 2,
- };
-
- typedef bool (*PointQueryFunction)(struct RTCPointQueryFunctionArguments* args);
-
- struct PointQueryContext
- {
- public:
- __forceinline PointQueryContext(Scene* scene,
- PointQuery* query_ws,
- PointQueryType query_type,
- PointQueryFunction func,
- RTCPointQueryContext* userContext,
- float similarityScale,
- void* userPtr)
- : scene(scene)
- , query_ws(query_ws)
- , query_type(query_type)
- , func(func)
- , userContext(userContext)
- , similarityScale(similarityScale)
- , userPtr(userPtr)
- , primID(RTC_INVALID_GEOMETRY_ID)
- , geomID(RTC_INVALID_GEOMETRY_ID)
- , query_radius(query_ws->radius)
- {
- if (query_type == POINT_QUERY_TYPE_AABB) {
- assert(similarityScale == 0.f);
- updateAABB();
- }
- if (userContext->instStackSize == 0) {
- assert(similarityScale == 1.f);
- }
- }
-
- public:
- __forceinline void updateAABB()
- {
- if (likely(query_ws->radius == (float)inf || userContext->instStackSize == 0)) {
- query_radius = Vec3fa(query_ws->radius);
- return;
- }
-
- const AffineSpace3fa m = AffineSpace3fa_load_unaligned((AffineSpace3fa*)userContext->world2inst[userContext->instStackSize-1]);
- BBox3fa bbox(Vec3fa(-query_ws->radius), Vec3fa(query_ws->radius));
- bbox = xfmBounds(m, bbox);
- query_radius = 0.5f * (bbox.upper - bbox.lower);
- }
-
-public:
- Scene* scene;
-
- PointQuery* query_ws; // the original world space point query
- PointQueryType query_type;
- PointQueryFunction func;
- RTCPointQueryContext* userContext;
- const float similarityScale;
-
- void* userPtr;
-
- unsigned int primID;
- unsigned int geomID;
-
- Vec3fa query_radius; // used if the query is converted to an AABB internally
- };
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/common/default.h b/thirdparty/embree-aarch64/kernels/common/default.h
deleted file mode 100644
index 709119163b..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/default.h
+++ /dev/null
@@ -1,273 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../../common/sys/platform.h"
-#include "../../common/sys/sysinfo.h"
-#include "../../common/sys/thread.h"
-#include "../../common/sys/alloc.h"
-#include "../../common/sys/ref.h"
-#include "../../common/sys/intrinsics.h"
-#include "../../common/sys/atomic.h"
-#include "../../common/sys/mutex.h"
-#include "../../common/sys/vector.h"
-#include "../../common/sys/array.h"
-#include "../../common/sys/string.h"
-#include "../../common/sys/regression.h"
-#include "../../common/sys/vector.h"
-
-#include "../../common/math/math.h"
-#include "../../common/math/transcendental.h"
-#include "../../common/simd/simd.h"
-#include "../../common/math/vec2.h"
-#include "../../common/math/vec3.h"
-#include "../../common/math/vec4.h"
-#include "../../common/math/vec2fa.h"
-#include "../../common/math/vec3fa.h"
-#include "../../common/math/interval.h"
-#include "../../common/math/bbox.h"
-#include "../../common/math/obbox.h"
-#include "../../common/math/lbbox.h"
-#include "../../common/math/linearspace2.h"
-#include "../../common/math/linearspace3.h"
-#include "../../common/math/affinespace.h"
-#include "../../common/math/range.h"
-#include "../../common/lexers/tokenstream.h"
-
-#include "../../common/tasking/taskscheduler.h"
-
-#define COMMA ,
-
-#include "../config.h"
-#include "isa.h"
-#include "stat.h"
-#include "profile.h"
-#include "rtcore.h"
-#include "vector.h"
-#include "state.h"
-#include "instance_stack.h"
-
-#include <vector>
-#include <map>
-#include <algorithm>
-#include <functional>
-#include <utility>
-#include <sstream>
-
-#if !defined(_DEBUG) && defined(BUILD_IOS)
-#undef assert
-#define assert(_EXPR)
-#endif
-
-namespace embree
-{
- ////////////////////////////////////////////////////////////////////////////////
- /// Vec2 shortcuts
- ////////////////////////////////////////////////////////////////////////////////
-
- template<int N> using Vec2vf = Vec2<vfloat<N>>;
- template<int N> using Vec2vd = Vec2<vdouble<N>>;
- template<int N> using Vec2vr = Vec2<vreal<N>>;
- template<int N> using Vec2vi = Vec2<vint<N>>;
- template<int N> using Vec2vl = Vec2<vllong<N>>;
- template<int N> using Vec2vb = Vec2<vbool<N>>;
- template<int N> using Vec2vbf = Vec2<vboolf<N>>;
- template<int N> using Vec2vbd = Vec2<vboold<N>>;
-
- typedef Vec2<vfloat4> Vec2vf4;
- typedef Vec2<vdouble4> Vec2vd4;
- typedef Vec2<vreal4> Vec2vr4;
- typedef Vec2<vint4> Vec2vi4;
- typedef Vec2<vllong4> Vec2vl4;
- typedef Vec2<vbool4> Vec2vb4;
- typedef Vec2<vboolf4> Vec2vbf4;
- typedef Vec2<vboold4> Vec2vbd4;
-
- typedef Vec2<vfloat8> Vec2vf8;
- typedef Vec2<vdouble8> Vec2vd8;
- typedef Vec2<vreal8> Vec2vr8;
- typedef Vec2<vint8> Vec2vi8;
- typedef Vec2<vllong8> Vec2vl8;
- typedef Vec2<vbool8> Vec2vb8;
- typedef Vec2<vboolf8> Vec2vbf8;
- typedef Vec2<vboold8> Vec2vbd8;
-
- typedef Vec2<vfloat16> Vec2vf16;
- typedef Vec2<vdouble16> Vec2vd16;
- typedef Vec2<vreal16> Vec2vr16;
- typedef Vec2<vint16> Vec2vi16;
- typedef Vec2<vllong16> Vec2vl16;
- typedef Vec2<vbool16> Vec2vb16;
- typedef Vec2<vboolf16> Vec2vbf16;
- typedef Vec2<vboold16> Vec2vbd16;
-
- typedef Vec2<vfloatx> Vec2vfx;
- typedef Vec2<vdoublex> Vec2vdx;
- typedef Vec2<vrealx> Vec2vrx;
- typedef Vec2<vintx> Vec2vix;
- typedef Vec2<vllongx> Vec2vlx;
- typedef Vec2<vboolx> Vec2vbx;
- typedef Vec2<vboolfx> Vec2vbfx;
- typedef Vec2<vbooldx> Vec2vbdx;
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Vec3 shortcuts
- ////////////////////////////////////////////////////////////////////////////////
-
- template<int N> using Vec3vf = Vec3<vfloat<N>>;
- template<int N> using Vec3vd = Vec3<vdouble<N>>;
- template<int N> using Vec3vr = Vec3<vreal<N>>;
- template<int N> using Vec3vi = Vec3<vint<N>>;
- template<int N> using Vec3vl = Vec3<vllong<N>>;
- template<int N> using Vec3vb = Vec3<vbool<N>>;
- template<int N> using Vec3vbf = Vec3<vboolf<N>>;
- template<int N> using Vec3vbd = Vec3<vboold<N>>;
-
- typedef Vec3<vfloat4> Vec3vf4;
- typedef Vec3<vdouble4> Vec3vd4;
- typedef Vec3<vreal4> Vec3vr4;
- typedef Vec3<vint4> Vec3vi4;
- typedef Vec3<vllong4> Vec3vl4;
- typedef Vec3<vbool4> Vec3vb4;
- typedef Vec3<vboolf4> Vec3vbf4;
- typedef Vec3<vboold4> Vec3vbd4;
-
- typedef Vec3<vfloat8> Vec3vf8;
- typedef Vec3<vdouble8> Vec3vd8;
- typedef Vec3<vreal8> Vec3vr8;
- typedef Vec3<vint8> Vec3vi8;
- typedef Vec3<vllong8> Vec3vl8;
- typedef Vec3<vbool8> Vec3vb8;
- typedef Vec3<vboolf8> Vec3vbf8;
- typedef Vec3<vboold8> Vec3vbd8;
-
- typedef Vec3<vfloat16> Vec3vf16;
- typedef Vec3<vdouble16> Vec3vd16;
- typedef Vec3<vreal16> Vec3vr16;
- typedef Vec3<vint16> Vec3vi16;
- typedef Vec3<vllong16> Vec3vl16;
- typedef Vec3<vbool16> Vec3vb16;
- typedef Vec3<vboolf16> Vec3vbf16;
- typedef Vec3<vboold16> Vec3vbd16;
-
- typedef Vec3<vfloatx> Vec3vfx;
- typedef Vec3<vdoublex> Vec3vdx;
- typedef Vec3<vrealx> Vec3vrx;
- typedef Vec3<vintx> Vec3vix;
- typedef Vec3<vllongx> Vec3vlx;
- typedef Vec3<vboolx> Vec3vbx;
- typedef Vec3<vboolfx> Vec3vbfx;
- typedef Vec3<vbooldx> Vec3vbdx;
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Vec4 shortcuts
- ////////////////////////////////////////////////////////////////////////////////
-
- template<int N> using Vec4vf = Vec4<vfloat<N>>;
- template<int N> using Vec4vd = Vec4<vdouble<N>>;
- template<int N> using Vec4vr = Vec4<vreal<N>>;
- template<int N> using Vec4vi = Vec4<vint<N>>;
- template<int N> using Vec4vl = Vec4<vllong<N>>;
- template<int N> using Vec4vb = Vec4<vbool<N>>;
- template<int N> using Vec4vbf = Vec4<vboolf<N>>;
- template<int N> using Vec4vbd = Vec4<vboold<N>>;
-
- typedef Vec4<vfloat4> Vec4vf4;
- typedef Vec4<vdouble4> Vec4vd4;
- typedef Vec4<vreal4> Vec4vr4;
- typedef Vec4<vint4> Vec4vi4;
- typedef Vec4<vllong4> Vec4vl4;
- typedef Vec4<vbool4> Vec4vb4;
- typedef Vec4<vboolf4> Vec4vbf4;
- typedef Vec4<vboold4> Vec4vbd4;
-
- typedef Vec4<vfloat8> Vec4vf8;
- typedef Vec4<vdouble8> Vec4vd8;
- typedef Vec4<vreal8> Vec4vr8;
- typedef Vec4<vint8> Vec4vi8;
- typedef Vec4<vllong8> Vec4vl8;
- typedef Vec4<vbool8> Vec4vb8;
- typedef Vec4<vboolf8> Vec4vbf8;
- typedef Vec4<vboold8> Vec4vbd8;
-
- typedef Vec4<vfloat16> Vec4vf16;
- typedef Vec4<vdouble16> Vec4vd16;
- typedef Vec4<vreal16> Vec4vr16;
- typedef Vec4<vint16> Vec4vi16;
- typedef Vec4<vllong16> Vec4vl16;
- typedef Vec4<vbool16> Vec4vb16;
- typedef Vec4<vboolf16> Vec4vbf16;
- typedef Vec4<vboold16> Vec4vbd16;
-
- typedef Vec4<vfloatx> Vec4vfx;
- typedef Vec4<vdoublex> Vec4vdx;
- typedef Vec4<vrealx> Vec4vrx;
- typedef Vec4<vintx> Vec4vix;
- typedef Vec4<vllongx> Vec4vlx;
- typedef Vec4<vboolx> Vec4vbx;
- typedef Vec4<vboolfx> Vec4vbfx;
- typedef Vec4<vbooldx> Vec4vbdx;
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Other shortcuts
- ////////////////////////////////////////////////////////////////////////////////
-
- template<int N> using BBox3vf = BBox<Vec3vf<N>>;
- typedef BBox<Vec3vf4> BBox3vf4;
- typedef BBox<Vec3vf8> BBox3vf8;
- typedef BBox<Vec3vf16> BBox3vf16;
-
- /* calculate time segment itime and fractional time ftime */
- __forceinline int getTimeSegment(float time, float numTimeSegments, float& ftime)
- {
- const float timeScaled = time * numTimeSegments;
- const float itimef = clamp(floorf(timeScaled), 0.0f, numTimeSegments-1.0f);
- ftime = timeScaled - itimef;
- return int(itimef);
- }
-
- __forceinline int getTimeSegment(float time, float start_time, float end_time, float numTimeSegments, float& ftime)
- {
- const float timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments;
- const float itimef = clamp(floorf(timeScaled), 0.0f, numTimeSegments-1.0f);
- ftime = timeScaled - itimef;
- return int(itimef);
- }
-
- template<int N>
- __forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime)
- {
- const vfloat<N> timeScaled = time * numTimeSegments;
- const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f);
- ftime = timeScaled - itimef;
- return vint<N>(itimef);
- }
-
- template<int N>
- __forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& start_time, const vfloat<N>& end_time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime)
- {
- const vfloat<N> timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments;
- const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f);
- ftime = timeScaled - itimef;
- return vint<N>(itimef);
- }
-
- /* calculate overlapping time segment range */
- __forceinline range<int> getTimeSegmentRange(const BBox1f& time_range, float numTimeSegments)
- {
- const float round_up = 1.0f+2.0f*float(ulp); // corrects inaccuracies to precisely match time step
- const float round_down = 1.0f-2.0f*float(ulp);
- const int itime_lower = (int)max(floor(round_up *time_range.lower*numTimeSegments), 0.0f);
- const int itime_upper = (int)min(ceil (round_down*time_range.upper*numTimeSegments), numTimeSegments);
- return make_range(itime_lower, itime_upper);
- }
-
- /* calculate overlapping time segment range */
- __forceinline range<int> getTimeSegmentRange(const BBox1f& range, BBox1f time_range, float numTimeSegments)
- {
- const float lower = (range.lower-time_range.lower)/time_range.size();
- const float upper = (range.upper-time_range.lower)/time_range.size();
- return getTimeSegmentRange(BBox1f(lower,upper),numTimeSegments);
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/device.cpp b/thirdparty/embree-aarch64/kernels/common/device.cpp
deleted file mode 100644
index 16ec11b892..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/device.cpp
+++ /dev/null
@@ -1,567 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "device.h"
-#include "../hash.h"
-#include "scene_triangle_mesh.h"
-#include "scene_user_geometry.h"
-#include "scene_instance.h"
-#include "scene_curves.h"
-#include "scene_subdiv_mesh.h"
-
-#include "../subdiv/tessellation_cache.h"
-
-#include "acceln.h"
-#include "geometry.h"
-
-#include "../geometry/cylinder.h"
-
-#include "../bvh/bvh4_factory.h"
-#include "../bvh/bvh8_factory.h"
-
-#include "../../common/tasking/taskscheduler.h"
-#include "../../common/sys/alloc.h"
-
-namespace embree
-{
- /*! some global variables that can be set via rtcSetParameter1i for debugging purposes */
- ssize_t Device::debug_int0 = 0;
- ssize_t Device::debug_int1 = 0;
- ssize_t Device::debug_int2 = 0;
- ssize_t Device::debug_int3 = 0;
-
- DECLARE_SYMBOL2(RayStreamFilterFuncs,rayStreamFilterFuncs);
-
- static MutexSys g_mutex;
- static std::map<Device*,size_t> g_cache_size_map;
- static std::map<Device*,size_t> g_num_threads_map;
-
- Device::Device (const char* cfg)
- {
- /* check that CPU supports lowest ISA */
- if (!hasISA(ISA)) {
- throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support " ISA_STR);
- }
-
- /* set default frequency level for detected CPU */
- switch (getCPUModel()) {
- case CPU::UNKNOWN: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::XEON_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::CORE_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD128; break;
- case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD128; break;
- case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD128; break;
- case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
- case CPU::XEON_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
- case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
- case CPU::XEON_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::CORE_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::XEON_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::CORE_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::XEON_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::CORE_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::SANDY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
- case CPU::NEHALEM: frequency_level = FREQUENCY_SIMD128; break;
- case CPU::CORE2: frequency_level = FREQUENCY_SIMD128; break;
- case CPU::CORE1: frequency_level = FREQUENCY_SIMD128; break;
- }
-
- /* initialize global state */
-#if defined(EMBREE_CONFIG)
- State::parseString(EMBREE_CONFIG);
-#endif
- State::parseString(cfg);
- if (!ignore_config_files && FileName::executableFolder() != FileName(""))
- State::parseFile(FileName::executableFolder()+FileName(".embree" TOSTRING(RTC_VERSION_MAJOR)));
- if (!ignore_config_files && FileName::homeFolder() != FileName(""))
- State::parseFile(FileName::homeFolder()+FileName(".embree" TOSTRING(RTC_VERSION_MAJOR)));
- State::verify();
-
- /* check whether selected ISA is supported by the HW, as the user could have forced an unsupported ISA */
- if (!checkISASupport()) {
- throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support selected ISA");
- }
-
- /*! do some internal tests */
- assert(isa::Cylinder::verify());
-
- /*! enable huge page support if desired */
-#if defined(__WIN32__)
- if (State::enable_selockmemoryprivilege)
- State::hugepages_success &= win_enable_selockmemoryprivilege(State::verbosity(3));
-#endif
- State::hugepages_success &= os_init(State::hugepages,State::verbosity(3));
-
- /*! set tessellation cache size */
- setCacheSize( State::tessellation_cache_size );
-
- /*! enable some floating point exceptions to catch bugs */
- if (State::float_exceptions)
- {
- int exceptions = _MM_MASK_MASK;
- //exceptions &= ~_MM_MASK_INVALID;
- exceptions &= ~_MM_MASK_DENORM;
- exceptions &= ~_MM_MASK_DIV_ZERO;
- //exceptions &= ~_MM_MASK_OVERFLOW;
- //exceptions &= ~_MM_MASK_UNDERFLOW;
- //exceptions &= ~_MM_MASK_INEXACT;
- _MM_SET_EXCEPTION_MASK(exceptions);
- }
-
- /* print info header */
- if (State::verbosity(1))
- print();
- if (State::verbosity(2))
- State::print();
-
- /* register all algorithms */
- bvh4_factory = make_unique(new BVH4Factory(enabled_builder_cpu_features, enabled_cpu_features));
-
-#if defined(EMBREE_TARGET_SIMD8)
- bvh8_factory = make_unique(new BVH8Factory(enabled_builder_cpu_features, enabled_cpu_features));
-#endif
-
- /* setup tasking system */
- initTaskingSystem(numThreads);
-
- /* ray stream SOA to AOS conversion */
-#if defined(EMBREE_RAY_PACKETS)
- RayStreamFilterFuncsType rayStreamFilterFuncs;
- SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(enabled_cpu_features,rayStreamFilterFuncs);
- rayStreamFilters = rayStreamFilterFuncs();
-#endif
- }
-
- Device::~Device ()
- {
- setCacheSize(0);
- exitTaskingSystem();
- }
-
- std::string getEnabledTargets()
- {
- std::string v;
-#if defined(EMBREE_TARGET_SSE2)
- v += "SSE2 ";
-#endif
-#if defined(EMBREE_TARGET_SSE42)
- v += "SSE4.2 ";
-#endif
-#if defined(EMBREE_TARGET_AVX)
- v += "AVX ";
-#endif
-#if defined(EMBREE_TARGET_AVX2)
- v += "AVX2 ";
-#endif
-#if defined(EMBREE_TARGET_AVX512KNL)
- v += "AVX512KNL ";
-#endif
-#if defined(EMBREE_TARGET_AVX512SKX)
- v += "AVX512SKX ";
-#endif
- return v;
- }
-
- std::string getEmbreeFeatures()
- {
- std::string v;
-#if defined(EMBREE_RAY_MASK)
- v += "raymasks ";
-#endif
-#if defined (EMBREE_BACKFACE_CULLING)
- v += "backfaceculling ";
-#endif
-#if defined (EMBREE_BACKFACE_CULLING_CURVES)
- v += "backfacecullingcurves ";
-#endif
-#if defined(EMBREE_FILTER_FUNCTION)
- v += "intersection_filter ";
-#endif
-#if defined (EMBREE_COMPACT_POLYS)
- v += "compact_polys ";
-#endif
- return v;
- }
-
- void Device::print()
- {
- const int cpu_features = getCPUFeatures();
- std::cout << std::endl;
- std::cout << "Embree Ray Tracing Kernels " << RTC_VERSION_STRING << " (" << RTC_HASH << ")" << std::endl;
- std::cout << " Compiler : " << getCompilerName() << std::endl;
- std::cout << " Build : ";
-#if defined(DEBUG)
- std::cout << "Debug " << std::endl;
-#else
- std::cout << "Release " << std::endl;
-#endif
- std::cout << " Platform : " << getPlatformName() << std::endl;
- std::cout << " CPU : " << stringOfCPUModel(getCPUModel()) << " (" << getCPUVendor() << ")" << std::endl;
- std::cout << " Threads : " << getNumberOfLogicalThreads() << std::endl;
- std::cout << " ISA : " << stringOfCPUFeatures(cpu_features) << std::endl;
- std::cout << " Targets : " << supportedTargetList(cpu_features) << std::endl;
- const bool hasFTZ = _mm_getcsr() & _MM_FLUSH_ZERO_ON;
- const bool hasDAZ = _mm_getcsr() & _MM_DENORMALS_ZERO_ON;
- std::cout << " MXCSR : " << "FTZ=" << hasFTZ << ", DAZ=" << hasDAZ << std::endl;
- std::cout << " Config" << std::endl;
- std::cout << " Threads : " << (numThreads ? toString(numThreads) : std::string("default")) << std::endl;
- std::cout << " ISA : " << stringOfCPUFeatures(enabled_cpu_features) << std::endl;
- std::cout << " Targets : " << supportedTargetList(enabled_cpu_features) << " (supported)" << std::endl;
- std::cout << " " << getEnabledTargets() << " (compile time enabled)" << std::endl;
- std::cout << " Features: " << getEmbreeFeatures() << std::endl;
- std::cout << " Tasking : ";
-#if defined(TASKING_TBB)
- std::cout << "TBB" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR << " ";
- #if TBB_INTERFACE_VERSION >= 12002
- std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << TBB_runtime_interface_version() << " ";
- #else
- std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << tbb::TBB_runtime_interface_version() << " ";
- #endif
-#endif
-#if defined(TASKING_INTERNAL)
- std::cout << "internal_tasking_system ";
-#endif
-#if defined(TASKING_GCD) && defined(BUILD_IOS)
- std::cout << "GCD tasking system ";
-#endif
-#if defined(TASKING_PPL)
- std::cout << "PPL ";
-#endif
- std::cout << std::endl;
-
- /* check of FTZ and DAZ flags are set in CSR */
- if (!hasFTZ || !hasDAZ)
- {
-#if !defined(_DEBUG)
- if (State::verbosity(1))
-#endif
- {
- std::cout << std::endl;
- std::cout << "================================================================================" << std::endl;
- std::cout << " WARNING: \"Flush to Zero\" or \"Denormals are Zero\" mode not enabled " << std::endl
- << " in the MXCSR control and status register. This can have a severe " << std::endl
- << " performance impact. Please enable these modes for each application " << std::endl
- << " thread the following way:" << std::endl
- << std::endl
- << " #include \"xmmintrin.h\"" << std::endl
- << " #include \"pmmintrin.h\"" << std::endl
- << std::endl
- << " _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);" << std::endl
- << " _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);" << std::endl;
- std::cout << "================================================================================" << std::endl;
- std::cout << std::endl;
- }
- }
- std::cout << std::endl;
- }
-
- void Device::setDeviceErrorCode(RTCError error)
- {
- RTCError* stored_error = errorHandler.error();
- if (*stored_error == RTC_ERROR_NONE)
- *stored_error = error;
- }
-
- RTCError Device::getDeviceErrorCode()
- {
- RTCError* stored_error = errorHandler.error();
- RTCError error = *stored_error;
- *stored_error = RTC_ERROR_NONE;
- return error;
- }
-
- void Device::setThreadErrorCode(RTCError error)
- {
- RTCError* stored_error = g_errorHandler.error();
- if (*stored_error == RTC_ERROR_NONE)
- *stored_error = error;
- }
-
- RTCError Device::getThreadErrorCode()
- {
- RTCError* stored_error = g_errorHandler.error();
- RTCError error = *stored_error;
- *stored_error = RTC_ERROR_NONE;
- return error;
- }
-
- void Device::process_error(Device* device, RTCError error, const char* str)
- {
- /* store global error code when device construction failed */
- if (!device)
- return setThreadErrorCode(error);
-
- /* print error when in verbose mode */
- if (device->verbosity(1))
- {
- switch (error) {
- case RTC_ERROR_NONE : std::cerr << "Embree: No error"; break;
- case RTC_ERROR_UNKNOWN : std::cerr << "Embree: Unknown error"; break;
- case RTC_ERROR_INVALID_ARGUMENT : std::cerr << "Embree: Invalid argument"; break;
- case RTC_ERROR_INVALID_OPERATION: std::cerr << "Embree: Invalid operation"; break;
- case RTC_ERROR_OUT_OF_MEMORY : std::cerr << "Embree: Out of memory"; break;
- case RTC_ERROR_UNSUPPORTED_CPU : std::cerr << "Embree: Unsupported CPU"; break;
- default : std::cerr << "Embree: Invalid error code"; break;
- };
- if (str) std::cerr << ", (" << str << ")";
- std::cerr << std::endl;
- }
-
- /* call user specified error callback */
- if (device->error_function)
- device->error_function(device->error_function_userptr,error,str);
-
- /* record error code */
- device->setDeviceErrorCode(error);
- }
-
- void Device::memoryMonitor(ssize_t bytes, bool post)
- {
- if (State::memory_monitor_function && bytes != 0) {
- if (!State::memory_monitor_function(State::memory_monitor_userptr,bytes,post)) {
- if (bytes > 0) { // only throw exception when we allocate memory to never throw inside a destructor
- throw_RTCError(RTC_ERROR_OUT_OF_MEMORY,"memory monitor forced termination");
- }
- }
- }
- }
-
- size_t getMaxNumThreads()
- {
- size_t maxNumThreads = 0;
- for (std::map<Device*,size_t>::iterator i=g_num_threads_map.begin(); i != g_num_threads_map.end(); i++)
- maxNumThreads = max(maxNumThreads, (*i).second);
- if (maxNumThreads == 0)
- maxNumThreads = std::numeric_limits<size_t>::max();
- return maxNumThreads;
- }
-
- size_t getMaxCacheSize()
- {
- size_t maxCacheSize = 0;
- for (std::map<Device*,size_t>::iterator i=g_cache_size_map.begin(); i!= g_cache_size_map.end(); i++)
- maxCacheSize = max(maxCacheSize, (*i).second);
- return maxCacheSize;
- }
-
- void Device::setCacheSize(size_t bytes)
- {
-#if defined(EMBREE_GEOMETRY_SUBDIVISION)
- Lock<MutexSys> lock(g_mutex);
- if (bytes == 0) g_cache_size_map.erase(this);
- else g_cache_size_map[this] = bytes;
-
- size_t maxCacheSize = getMaxCacheSize();
- resizeTessellationCache(maxCacheSize);
-#endif
- }
-
- void Device::initTaskingSystem(size_t numThreads)
- {
- Lock<MutexSys> lock(g_mutex);
- if (numThreads == 0)
- g_num_threads_map[this] = std::numeric_limits<size_t>::max();
- else
- g_num_threads_map[this] = numThreads;
-
- /* create task scheduler */
- size_t maxNumThreads = getMaxNumThreads();
- TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
-#if USE_TASK_ARENA
- const size_t nThreads = min(maxNumThreads,TaskScheduler::threadCount());
- const size_t uThreads = min(max(numUserThreads,(size_t)1),nThreads);
- arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads));
-#endif
- }
-
- void Device::exitTaskingSystem()
- {
- Lock<MutexSys> lock(g_mutex);
- g_num_threads_map.erase(this);
-
- /* terminate tasking system */
- if (g_num_threads_map.size() == 0) {
- TaskScheduler::destroy();
- }
- /* or configure new number of threads */
- else {
- size_t maxNumThreads = getMaxNumThreads();
- TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
- }
-#if USE_TASK_ARENA
- arena.reset();
-#endif
- }
-
- void Device::setProperty(const RTCDeviceProperty prop, ssize_t val)
- {
- /* hidden internal properties */
- switch ((size_t)prop)
- {
- case 1000000: debug_int0 = val; return;
- case 1000001: debug_int1 = val; return;
- case 1000002: debug_int2 = val; return;
- case 1000003: debug_int3 = val; return;
- }
-
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown writable property");
- }
-
- ssize_t Device::getProperty(const RTCDeviceProperty prop)
- {
- size_t iprop = (size_t)prop;
-
- /* get name of internal regression test */
- if (iprop >= 2000000 && iprop < 3000000)
- {
- RegressionTest* test = getRegressionTest(iprop-2000000);
- if (test) return (ssize_t) test->name.c_str();
- else return 0;
- }
-
- /* run internal regression test */
- if (iprop >= 3000000 && iprop < 4000000)
- {
- RegressionTest* test = getRegressionTest(iprop-3000000);
- if (test) return test->run();
- else return 0;
- }
-
- /* documented properties */
- switch (prop)
- {
- case RTC_DEVICE_PROPERTY_VERSION_MAJOR: return RTC_VERSION_MAJOR;
- case RTC_DEVICE_PROPERTY_VERSION_MINOR: return RTC_VERSION_MINOR;
- case RTC_DEVICE_PROPERTY_VERSION_PATCH: return RTC_VERSION_PATCH;
- case RTC_DEVICE_PROPERTY_VERSION : return RTC_VERSION;
-
-#if defined(EMBREE_TARGET_SIMD4) && defined(EMBREE_RAY_PACKETS)
- case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return hasISA(SSE2);
-#else
- case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_TARGET_SIMD8) && defined(EMBREE_RAY_PACKETS)
- case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return hasISA(AVX);
-#else
- case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_TARGET_SIMD16) && defined(EMBREE_RAY_PACKETS)
- case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return hasISA(AVX512KNL) | hasISA(AVX512SKX);
-#else
- case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_RAY_PACKETS)
- case RTC_DEVICE_PROPERTY_RAY_STREAM_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_RAY_STREAM_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_RAY_MASK)
- case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_BACKFACE_CULLING)
- case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 0;
-#endif
-
-#if defined(EMBREE_BACKFACE_CULLING_CURVES)
- case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 0;
-#endif
-
-#if defined(EMBREE_COMPACT_POLYS)
- case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 0;
-#endif
-
-#if defined(EMBREE_FILTER_FUNCTION)
- case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_IGNORE_INVALID_RAYS)
- case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 0;
-#endif
-
-#if defined(TASKING_INTERNAL)
- case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 0;
-#endif
-
-#if defined(TASKING_TBB)
- case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 1;
-#endif
-
-#if defined(TASKING_PPL)
- case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 2;
-#endif
-
-#if defined(TASKING_GCD) && defined(BUILD_IOS)
- case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 3;
-#endif
-
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_GEOMETRY_QUAD)
- case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_GEOMETRY_CURVE)
- case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_GEOMETRY_SUBDIVISION)
- case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_GEOMETRY_USER)
- case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 0;
-#endif
-
-#if defined(EMBREE_GEOMETRY_POINT)
- case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 0;
-#endif
-
-#if defined(TASKING_PPL)
- case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;
-#elif defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8)
- case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;
-#else
- case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 1;
-#endif
-
-#if defined(TASKING_TBB) && TASKING_TBB_USE_TASK_ISOLATION
- case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 1;
-#else
- case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 0;
-#endif
-
- default: throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown readable property"); break;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/device.h b/thirdparty/embree-aarch64/kernels/common/device.h
deleted file mode 100644
index e9a81bb109..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/device.h
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "state.h"
-#include "accel.h"
-
-namespace embree
-{
- class BVH4Factory;
- class BVH8Factory;
-
- class Device : public State, public MemoryMonitorInterface
- {
- ALIGNED_CLASS_(16);
-
- public:
-
- /*! Device construction */
- Device (const char* cfg);
-
- /*! Device destruction */
- virtual ~Device ();
-
- /*! prints info about the device */
- void print();
-
- /*! sets the error code */
- void setDeviceErrorCode(RTCError error);
-
- /*! returns and clears the error code */
- RTCError getDeviceErrorCode();
-
- /*! sets the error code */
- static void setThreadErrorCode(RTCError error);
-
- /*! returns and clears the error code */
- static RTCError getThreadErrorCode();
-
- /*! processes error codes, do not call directly */
- static void process_error(Device* device, RTCError error, const char* str);
-
- /*! invokes the memory monitor callback */
- void memoryMonitor(ssize_t bytes, bool post);
-
- /*! sets the size of the software cache. */
- void setCacheSize(size_t bytes);
-
- /*! sets a property */
- void setProperty(const RTCDeviceProperty prop, ssize_t val);
-
- /*! gets a property */
- ssize_t getProperty(const RTCDeviceProperty prop);
-
- private:
-
- /*! initializes the tasking system */
- void initTaskingSystem(size_t numThreads);
-
- /*! shuts down the tasking system */
- void exitTaskingSystem();
-
- /*! some variables that can be set via rtcSetParameter1i for debugging purposes */
- public:
- static ssize_t debug_int0;
- static ssize_t debug_int1;
- static ssize_t debug_int2;
- static ssize_t debug_int3;
-
- public:
- std::unique_ptr<BVH4Factory> bvh4_factory;
-#if defined(EMBREE_TARGET_SIMD8)
- std::unique_ptr<BVH8Factory> bvh8_factory;
-#endif
-
-#if USE_TASK_ARENA
- std::unique_ptr<tbb::task_arena> arena;
-#endif
-
- /* ray streams filter */
- RayStreamFilterFuncs rayStreamFilters;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/geometry.cpp b/thirdparty/embree-aarch64/kernels/common/geometry.cpp
deleted file mode 100644
index b3aa8e3396..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/geometry.cpp
+++ /dev/null
@@ -1,259 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "geometry.h"
-#include "scene.h"
-
-namespace embree
-{
- const char* Geometry::gtype_names[Geometry::GTY_END] =
- {
- "flat_linear_curve",
- "round_linear_curve",
- "oriented_linear_curve",
- "",
- "flat_bezier_curve",
- "round_bezier_curve",
- "oriented_bezier_curve",
- "",
- "flat_bspline_curve",
- "round_bspline_curve",
- "oriented_bspline_curve",
- "",
- "flat_hermite_curve",
- "round_hermite_curve",
- "oriented_hermite_curve",
- "",
- "flat_catmull_rom_curve",
- "round_catmull_rom_curve",
- "oriented_catmull_rom_curve",
- "",
- "triangles",
- "quads",
- "grid",
- "subdivs",
- "",
- "sphere",
- "disc",
- "oriented_disc",
- "",
- "usergeom",
- "instance_cheap",
- "instance_expensive",
- };
-
- Geometry::Geometry (Device* device, GType gtype, unsigned int numPrimitives, unsigned int numTimeSteps)
- : device(device), userPtr(nullptr),
- numPrimitives(numPrimitives), numTimeSteps(unsigned(numTimeSteps)), fnumTimeSegments(float(numTimeSteps-1)), time_range(0.0f,1.0f),
- mask(-1),
- gtype(gtype),
- gsubtype(GTY_SUBTYPE_DEFAULT),
- quality(RTC_BUILD_QUALITY_MEDIUM),
- state((unsigned)State::MODIFIED),
- enabled(true),
- intersectionFilterN(nullptr), occlusionFilterN(nullptr), pointQueryFunc(nullptr)
- {
- device->refInc();
- }
-
- Geometry::~Geometry()
- {
- device->refDec();
- }
-
- void Geometry::setNumPrimitives(unsigned int numPrimitives_in)
- {
- if (numPrimitives_in == numPrimitives) return;
-
- numPrimitives = numPrimitives_in;
-
- Geometry::update();
- }
-
- void Geometry::setNumTimeSteps (unsigned int numTimeSteps_in)
- {
- if (numTimeSteps_in == numTimeSteps) {
- return;
- }
-
- numTimeSteps = numTimeSteps_in;
- fnumTimeSegments = float(numTimeSteps_in-1);
-
- Geometry::update();
- }
-
- void Geometry::setTimeRange (const BBox1f range)
- {
- time_range = range;
- Geometry::update();
- }
-
- void Geometry::update()
- {
- ++modCounter_; // FIXME: required?
- state = (unsigned)State::MODIFIED;
- }
-
- void Geometry::commit()
- {
- ++modCounter_;
- state = (unsigned)State::COMMITTED;
- }
-
- void Geometry::preCommit()
- {
- if (State::MODIFIED == (State)state)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"geometry not committed");
- }
-
- void Geometry::postCommit()
- {
- }
-
- void Geometry::enable ()
- {
- if (isEnabled())
- return;
-
- enabled = true;
- ++modCounter_;
- }
-
- void Geometry::disable ()
- {
- if (isDisabled())
- return;
-
- enabled = false;
- ++modCounter_;
- }
-
- void Geometry::setUserData (void* ptr)
- {
- userPtr = ptr;
- }
-
- void Geometry::setIntersectionFilterFunctionN (RTCFilterFunctionN filter)
- {
- if (!(getTypeMask() & (MTY_TRIANGLE_MESH | MTY_QUAD_MESH | MTY_CURVES | MTY_SUBDIV_MESH | MTY_USER_GEOMETRY | MTY_GRID_MESH)))
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"filter functions not supported for this geometry");
-
- intersectionFilterN = filter;
- }
-
- void Geometry::setOcclusionFilterFunctionN (RTCFilterFunctionN filter)
- {
- if (!(getTypeMask() & (MTY_TRIANGLE_MESH | MTY_QUAD_MESH | MTY_CURVES | MTY_SUBDIV_MESH | MTY_USER_GEOMETRY | MTY_GRID_MESH)))
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"filter functions not supported for this geometry");
-
- occlusionFilterN = filter;
- }
-
- void Geometry::setPointQueryFunction (RTCPointQueryFunction func)
- {
- pointQueryFunc = func;
- }
-
- void Geometry::interpolateN(const RTCInterpolateNArguments* const args)
- {
- const void* valid_i = args->valid;
- const unsigned* primIDs = args->primIDs;
- const float* u = args->u;
- const float* v = args->v;
- unsigned int N = args->N;
- RTCBufferType bufferType = args->bufferType;
- unsigned int bufferSlot = args->bufferSlot;
- float* P = args->P;
- float* dPdu = args->dPdu;
- float* dPdv = args->dPdv;
- float* ddPdudu = args->ddPdudu;
- float* ddPdvdv = args->ddPdvdv;
- float* ddPdudv = args->ddPdudv;
- unsigned int valueCount = args->valueCount;
-
- if (valueCount > 256) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"maximally 256 floating point values can be interpolated per vertex");
- const int* valid = (const int*) valid_i;
-
- __aligned(64) float P_tmp[256];
- __aligned(64) float dPdu_tmp[256];
- __aligned(64) float dPdv_tmp[256];
- __aligned(64) float ddPdudu_tmp[256];
- __aligned(64) float ddPdvdv_tmp[256];
- __aligned(64) float ddPdudv_tmp[256];
-
- float* Pt = P ? P_tmp : nullptr;
- float* dPdut = nullptr, *dPdvt = nullptr;
- if (dPdu) { dPdut = dPdu_tmp; dPdvt = dPdv_tmp; }
- float* ddPdudut = nullptr, *ddPdvdvt = nullptr, *ddPdudvt = nullptr;
- if (ddPdudu) { ddPdudut = ddPdudu_tmp; ddPdvdvt = ddPdvdv_tmp; ddPdudvt = ddPdudv_tmp; }
-
- for (unsigned int i=0; i<N; i++)
- {
- if (valid && !valid[i]) continue;
-
- RTCInterpolateArguments iargs;
- iargs.primID = primIDs[i];
- iargs.u = u[i];
- iargs.v = v[i];
- iargs.bufferType = bufferType;
- iargs.bufferSlot = bufferSlot;
- iargs.P = Pt;
- iargs.dPdu = dPdut;
- iargs.dPdv = dPdvt;
- iargs.ddPdudu = ddPdudut;
- iargs.ddPdvdv = ddPdvdvt;
- iargs.ddPdudv = ddPdudvt;
- iargs.valueCount = valueCount;
- interpolate(&iargs);
-
- if (likely(P)) {
- for (unsigned int j=0; j<valueCount; j++)
- P[j*N+i] = Pt[j];
- }
- if (likely(dPdu))
- {
- for (unsigned int j=0; j<valueCount; j++) {
- dPdu[j*N+i] = dPdut[j];
- dPdv[j*N+i] = dPdvt[j];
- }
- }
- if (likely(ddPdudu))
- {
- for (unsigned int j=0; j<valueCount; j++) {
- ddPdudu[j*N+i] = ddPdudut[j];
- ddPdvdv[j*N+i] = ddPdvdvt[j];
- ddPdudv[j*N+i] = ddPdudvt[j];
- }
- }
- }
- }
-
- bool Geometry::pointQuery(PointQuery* query, PointQueryContext* context)
- {
- assert(context->primID < size());
-
- RTCPointQueryFunctionArguments args;
- args.query = (RTCPointQuery*)context->query_ws;
- args.userPtr = context->userPtr;
- args.primID = context->primID;
- args.geomID = context->geomID;
- args.context = context->userContext;
- args.similarityScale = context->similarityScale;
-
- bool update = false;
- if(context->func) update |= context->func(&args);
- if(pointQueryFunc) update |= pointQueryFunc(&args);
-
- if (update && context->userContext->instStackSize > 0)
- {
- // update point query
- if (context->query_type == POINT_QUERY_TYPE_AABB) {
- context->updateAABB();
- } else {
- assert(context->similarityScale > 0.f);
- query->radius = context->query_ws->radius * context->similarityScale;
- }
- }
- return update;
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/geometry.h b/thirdparty/embree-aarch64/kernels/common/geometry.h
deleted file mode 100644
index 953974bfd2..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/geometry.h
+++ /dev/null
@@ -1,582 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "device.h"
-#include "buffer.h"
-#include "../common/point_query.h"
-#include "../builders/priminfo.h"
-
-namespace embree
-{
- class Scene;
- class Geometry;
-
- struct GeometryCounts
- {
- __forceinline GeometryCounts()
- : numFilterFunctions(0),
- numTriangles(0), numMBTriangles(0),
- numQuads(0), numMBQuads(0),
- numBezierCurves(0), numMBBezierCurves(0),
- numLineSegments(0), numMBLineSegments(0),
- numSubdivPatches(0), numMBSubdivPatches(0),
- numUserGeometries(0), numMBUserGeometries(0),
- numInstancesCheap(0), numMBInstancesCheap(0),
- numInstancesExpensive(0), numMBInstancesExpensive(0),
- numGrids(0), numMBGrids(0),
- numPoints(0), numMBPoints(0) {}
-
- __forceinline size_t size() const {
- return numTriangles + numQuads + numBezierCurves + numLineSegments + numSubdivPatches + numUserGeometries + numInstancesCheap + numInstancesExpensive + numGrids + numPoints
- + numMBTriangles + numMBQuads + numMBBezierCurves + numMBLineSegments + numMBSubdivPatches + numMBUserGeometries + numMBInstancesCheap + numMBInstancesExpensive + numMBGrids + numMBPoints;
- }
-
- __forceinline unsigned int enabledGeometryTypesMask() const
- {
- unsigned int mask = 0;
- if (numTriangles) mask |= 1 << 0;
- if (numQuads) mask |= 1 << 1;
- if (numBezierCurves+numLineSegments) mask |= 1 << 2;
- if (numSubdivPatches) mask |= 1 << 3;
- if (numUserGeometries) mask |= 1 << 4;
- if (numInstancesCheap) mask |= 1 << 5;
- if (numInstancesExpensive) mask |= 1 << 6;
- if (numGrids) mask |= 1 << 7;
- if (numPoints) mask |= 1 << 8;
-
- unsigned int maskMB = 0;
- if (numMBTriangles) maskMB |= 1 << 0;
- if (numMBQuads) maskMB |= 1 << 1;
- if (numMBBezierCurves+numMBLineSegments) maskMB |= 1 << 2;
- if (numMBSubdivPatches) maskMB |= 1 << 3;
- if (numMBUserGeometries) maskMB |= 1 << 4;
- if (numMBInstancesCheap) maskMB |= 1 << 5;
- if (numMBInstancesExpensive) maskMB |= 1 << 6;
- if (numMBGrids) maskMB |= 1 << 7;
- if (numMBPoints) maskMB |= 1 << 8;
-
- return (mask<<8) + maskMB;
- }
-
- __forceinline GeometryCounts operator+ (GeometryCounts const & rhs) const
- {
- GeometryCounts ret;
- ret.numFilterFunctions = numFilterFunctions + rhs.numFilterFunctions;
- ret.numTriangles = numTriangles + rhs.numTriangles;
- ret.numMBTriangles = numMBTriangles + rhs.numMBTriangles;
- ret.numQuads = numQuads + rhs.numQuads;
- ret.numMBQuads = numMBQuads + rhs.numMBQuads;
- ret.numBezierCurves = numBezierCurves + rhs.numBezierCurves;
- ret.numMBBezierCurves = numMBBezierCurves + rhs.numMBBezierCurves;
- ret.numLineSegments = numLineSegments + rhs.numLineSegments;
- ret.numMBLineSegments = numMBLineSegments + rhs.numMBLineSegments;
- ret.numSubdivPatches = numSubdivPatches + rhs.numSubdivPatches;
- ret.numMBSubdivPatches = numMBSubdivPatches + rhs.numMBSubdivPatches;
- ret.numUserGeometries = numUserGeometries + rhs.numUserGeometries;
- ret.numMBUserGeometries = numMBUserGeometries + rhs.numMBUserGeometries;
- ret.numInstancesCheap = numInstancesCheap + rhs.numInstancesCheap;
- ret.numMBInstancesCheap = numMBInstancesCheap + rhs.numMBInstancesCheap;
- ret.numInstancesExpensive = numInstancesExpensive + rhs.numInstancesExpensive;
- ret.numMBInstancesExpensive = numMBInstancesExpensive + rhs.numMBInstancesExpensive;
- ret.numGrids = numGrids + rhs.numGrids;
- ret.numMBGrids = numMBGrids + rhs.numMBGrids;
- ret.numPoints = numPoints + rhs.numPoints;
- ret.numMBPoints = numMBPoints + rhs.numMBPoints;
-
- return ret;
- }
-
- size_t numFilterFunctions; //!< number of geometries with filter functions enabled
- size_t numTriangles; //!< number of enabled triangles
- size_t numMBTriangles; //!< number of enabled motion blured triangles
- size_t numQuads; //!< number of enabled quads
- size_t numMBQuads; //!< number of enabled motion blurred quads
- size_t numBezierCurves; //!< number of enabled curves
- size_t numMBBezierCurves; //!< number of enabled motion blurred curves
- size_t numLineSegments; //!< number of enabled line segments
- size_t numMBLineSegments; //!< number of enabled line motion blurred segments
- size_t numSubdivPatches; //!< number of enabled subdivision patches
- size_t numMBSubdivPatches; //!< number of enabled motion blured subdivision patches
- size_t numUserGeometries; //!< number of enabled user geometries
- size_t numMBUserGeometries; //!< number of enabled motion blurred user geometries
- size_t numInstancesCheap; //!< number of enabled cheap instances
- size_t numMBInstancesCheap; //!< number of enabled motion blurred cheap instances
- size_t numInstancesExpensive; //!< number of enabled expensive instances
- size_t numMBInstancesExpensive; //!< number of enabled motion blurred expensive instances
- size_t numGrids; //!< number of enabled grid geometries
- size_t numMBGrids; //!< number of enabled motion blurred grid geometries
- size_t numPoints; //!< number of enabled points
- size_t numMBPoints; //!< number of enabled motion blurred points
- };
-
- /*! Base class all geometries are derived from */
- class Geometry : public RefCount
- {
- friend class Scene;
- public:
-
- /*! type of geometry */
- enum GType
- {
- GTY_FLAT_LINEAR_CURVE = 0,
- GTY_ROUND_LINEAR_CURVE = 1,
- GTY_ORIENTED_LINEAR_CURVE = 2,
- GTY_CONE_LINEAR_CURVE = 3,
-
- GTY_FLAT_BEZIER_CURVE = 4,
- GTY_ROUND_BEZIER_CURVE = 5,
- GTY_ORIENTED_BEZIER_CURVE = 6,
-
- GTY_FLAT_BSPLINE_CURVE = 8,
- GTY_ROUND_BSPLINE_CURVE = 9,
- GTY_ORIENTED_BSPLINE_CURVE = 10,
-
- GTY_FLAT_HERMITE_CURVE = 12,
- GTY_ROUND_HERMITE_CURVE = 13,
- GTY_ORIENTED_HERMITE_CURVE = 14,
-
- GTY_FLAT_CATMULL_ROM_CURVE = 16,
- GTY_ROUND_CATMULL_ROM_CURVE = 17,
- GTY_ORIENTED_CATMULL_ROM_CURVE = 18,
-
- GTY_TRIANGLE_MESH = 20,
- GTY_QUAD_MESH = 21,
- GTY_GRID_MESH = 22,
- GTY_SUBDIV_MESH = 23,
-
- GTY_SPHERE_POINT = 25,
- GTY_DISC_POINT = 26,
- GTY_ORIENTED_DISC_POINT = 27,
-
- GTY_USER_GEOMETRY = 29,
- GTY_INSTANCE_CHEAP = 30,
- GTY_INSTANCE_EXPENSIVE = 31,
- GTY_END = 32,
-
- GTY_BASIS_LINEAR = 0,
- GTY_BASIS_BEZIER = 4,
- GTY_BASIS_BSPLINE = 8,
- GTY_BASIS_HERMITE = 12,
- GTY_BASIS_CATMULL_ROM = 16,
- GTY_BASIS_MASK = 28,
-
- GTY_SUBTYPE_FLAT_CURVE = 0,
- GTY_SUBTYPE_ROUND_CURVE = 1,
- GTY_SUBTYPE_ORIENTED_CURVE = 2,
- GTY_SUBTYPE_MASK = 3,
- };
-
- enum GSubType
- {
- GTY_SUBTYPE_DEFAULT= 0,
- GTY_SUBTYPE_INSTANCE_LINEAR = 0,
- GTY_SUBTYPE_INSTANCE_QUATERNION = 1
- };
-
- enum GTypeMask
- {
- MTY_FLAT_LINEAR_CURVE = 1ul << GTY_FLAT_LINEAR_CURVE,
- MTY_ROUND_LINEAR_CURVE = 1ul << GTY_ROUND_LINEAR_CURVE,
- MTY_CONE_LINEAR_CURVE = 1ul << GTY_CONE_LINEAR_CURVE,
- MTY_ORIENTED_LINEAR_CURVE = 1ul << GTY_ORIENTED_LINEAR_CURVE,
-
- MTY_FLAT_BEZIER_CURVE = 1ul << GTY_FLAT_BEZIER_CURVE,
- MTY_ROUND_BEZIER_CURVE = 1ul << GTY_ROUND_BEZIER_CURVE,
- MTY_ORIENTED_BEZIER_CURVE = 1ul << GTY_ORIENTED_BEZIER_CURVE,
-
- MTY_FLAT_BSPLINE_CURVE = 1ul << GTY_FLAT_BSPLINE_CURVE,
- MTY_ROUND_BSPLINE_CURVE = 1ul << GTY_ROUND_BSPLINE_CURVE,
- MTY_ORIENTED_BSPLINE_CURVE = 1ul << GTY_ORIENTED_BSPLINE_CURVE,
-
- MTY_FLAT_HERMITE_CURVE = 1ul << GTY_FLAT_HERMITE_CURVE,
- MTY_ROUND_HERMITE_CURVE = 1ul << GTY_ROUND_HERMITE_CURVE,
- MTY_ORIENTED_HERMITE_CURVE = 1ul << GTY_ORIENTED_HERMITE_CURVE,
-
- MTY_FLAT_CATMULL_ROM_CURVE = 1ul << GTY_FLAT_CATMULL_ROM_CURVE,
- MTY_ROUND_CATMULL_ROM_CURVE = 1ul << GTY_ROUND_CATMULL_ROM_CURVE,
- MTY_ORIENTED_CATMULL_ROM_CURVE = 1ul << GTY_ORIENTED_CATMULL_ROM_CURVE,
-
- MTY_CURVE2 = MTY_FLAT_LINEAR_CURVE | MTY_ROUND_LINEAR_CURVE | MTY_CONE_LINEAR_CURVE | MTY_ORIENTED_LINEAR_CURVE,
-
- MTY_CURVE4 = MTY_FLAT_BEZIER_CURVE | MTY_ROUND_BEZIER_CURVE | MTY_ORIENTED_BEZIER_CURVE |
- MTY_FLAT_BSPLINE_CURVE | MTY_ROUND_BSPLINE_CURVE | MTY_ORIENTED_BSPLINE_CURVE |
- MTY_FLAT_HERMITE_CURVE | MTY_ROUND_HERMITE_CURVE | MTY_ORIENTED_HERMITE_CURVE |
- MTY_FLAT_CATMULL_ROM_CURVE | MTY_ROUND_CATMULL_ROM_CURVE | MTY_ORIENTED_CATMULL_ROM_CURVE,
-
- MTY_SPHERE_POINT = 1ul << GTY_SPHERE_POINT,
- MTY_DISC_POINT = 1ul << GTY_DISC_POINT,
- MTY_ORIENTED_DISC_POINT = 1ul << GTY_ORIENTED_DISC_POINT,
-
- MTY_POINTS = MTY_SPHERE_POINT | MTY_DISC_POINT | MTY_ORIENTED_DISC_POINT,
-
- MTY_CURVES = MTY_CURVE2 | MTY_CURVE4 | MTY_POINTS,
-
- MTY_TRIANGLE_MESH = 1ul << GTY_TRIANGLE_MESH,
- MTY_QUAD_MESH = 1ul << GTY_QUAD_MESH,
- MTY_GRID_MESH = 1ul << GTY_GRID_MESH,
- MTY_SUBDIV_MESH = 1ul << GTY_SUBDIV_MESH,
- MTY_USER_GEOMETRY = 1ul << GTY_USER_GEOMETRY,
-
- MTY_INSTANCE_CHEAP = 1ul << GTY_INSTANCE_CHEAP,
- MTY_INSTANCE_EXPENSIVE = 1ul << GTY_INSTANCE_EXPENSIVE,
- MTY_INSTANCE = MTY_INSTANCE_CHEAP | MTY_INSTANCE_EXPENSIVE
- };
-
- static const char* gtype_names[GTY_END];
-
- enum class State : unsigned {
- MODIFIED = 0,
- COMMITTED = 1,
- };
-
- public:
-
- /*! Geometry constructor */
- Geometry (Device* device, GType gtype, unsigned int numPrimitives, unsigned int numTimeSteps);
-
- /*! Geometry destructor */
- virtual ~Geometry();
-
- public:
-
- /*! tests if geometry is enabled */
- __forceinline bool isEnabled() const { return enabled; }
-
- /*! tests if geometry is disabled */
- __forceinline bool isDisabled() const { return !isEnabled(); }
-
- /*! tests if that geometry has some filter function set */
- __forceinline bool hasFilterFunctions () const {
- return (intersectionFilterN != nullptr) || (occlusionFilterN != nullptr);
- }
-
- /*! returns geometry type */
- __forceinline GType getType() const { return gtype; }
-
- /*! returns curve type */
- __forceinline GType getCurveType() const { return (GType)(gtype & GTY_SUBTYPE_MASK); }
-
- /*! returns curve basis */
- __forceinline GType getCurveBasis() const { return (GType)(gtype & GTY_BASIS_MASK); }
-
- /*! returns geometry type mask */
- __forceinline GTypeMask getTypeMask() const { return (GTypeMask)(1 << gtype); }
-
- /*! returns number of primitives */
- __forceinline size_t size() const { return numPrimitives; }
-
- /*! sets the number of primitives */
- virtual void setNumPrimitives(unsigned int numPrimitives_in);
-
- /*! sets number of time steps */
- virtual void setNumTimeSteps (unsigned int numTimeSteps_in);
-
- /*! sets motion blur time range */
- void setTimeRange (const BBox1f range);
-
- /*! sets number of vertex attributes */
- virtual void setVertexAttributeCount (unsigned int N) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! sets number of topologies */
- virtual void setTopologyCount (unsigned int N) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! sets the build quality */
- void setBuildQuality(RTCBuildQuality quality_in)
- {
- this->quality = quality_in;
- Geometry::update();
- }
-
- /* calculate time segment itime and fractional time ftime */
- __forceinline int timeSegment(float time, float& ftime) const {
- return getTimeSegment(time,time_range.lower,time_range.upper,fnumTimeSegments,ftime);
- }
-
- template<int N>
- __forceinline vint<N> timeSegment(const vfloat<N>& time, vfloat<N>& ftime) const {
- return getTimeSegment(time,vfloat<N>(time_range.lower),vfloat<N>(time_range.upper),vfloat<N>(fnumTimeSegments),ftime);
- }
-
- /* calculate overlapping time segment range */
- __forceinline range<int> timeSegmentRange(const BBox1f& range) const {
- return getTimeSegmentRange(range,time_range,fnumTimeSegments);
- }
-
- /* returns time that corresponds to time step */
- __forceinline float timeStep(const int i) const {
- assert(i>=0 && i<(int)numTimeSteps);
- return time_range.lower + time_range.size()*float(i)/fnumTimeSegments;
- }
-
- /*! for all geometries */
- public:
-
- /*! Enable geometry. */
- virtual void enable();
-
- /*! Update geometry. */
- void update();
-
- /*! commit of geometry */
- virtual void commit();
-
- /*! Update geometry buffer. */
- virtual void updateBuffer(RTCBufferType type, unsigned int slot) {
- update(); // update everything for geometries not supporting this call
- }
-
- /*! Disable geometry. */
- virtual void disable();
-
- /*! Verify the geometry */
- virtual bool verify() { return true; }
-
- /*! called before every build */
- virtual void preCommit();
-
- /*! called after every build */
- virtual void postCommit();
-
- virtual void addElementsToCount (GeometryCounts & counts) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- };
-
- /*! sets constant tessellation rate for the geometry */
- virtual void setTessellationRate(float N) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Sets the maximal curve radius scale allowed by min-width feature. */
- virtual void setMaxRadiusScale(float s) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Set user data pointer. */
- virtual void setUserData(void* ptr);
-
- /*! Get user data pointer. */
- __forceinline void* getUserData() const {
- return userPtr;
- }
-
- /*! interpolates user data to the specified u/v location */
- virtual void interpolate(const RTCInterpolateArguments* const args) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! interpolates user data to the specified u/v locations */
- virtual void interpolateN(const RTCInterpolateNArguments* const args);
-
- /* point query api */
- bool pointQuery(PointQuery* query, PointQueryContext* context);
-
- /*! for subdivision surfaces only */
- public:
- virtual void setSubdivisionMode (unsigned topologyID, RTCSubdivisionMode mode) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- virtual void setVertexAttributeTopology(unsigned int vertexBufferSlot, unsigned int indexBufferSlot) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Set displacement function. */
- virtual void setDisplacementFunction (RTCDisplacementFunctionN filter) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- virtual unsigned int getFirstHalfEdge(unsigned int faceID) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- virtual unsigned int getFace(unsigned int edgeID) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- virtual unsigned int getNextHalfEdge(unsigned int edgeID) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- virtual unsigned int getPreviousHalfEdge(unsigned int edgeID) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- virtual unsigned int getOppositeHalfEdge(unsigned int topologyID, unsigned int edgeID) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! get fast access to first vertex buffer if applicable */
- virtual float * getCompactVertexArray () const {
- return nullptr;
- }
-
- /*! Returns the modified counter - how many times the geo has been modified */
- __forceinline unsigned int getModCounter () const {
- return modCounter_;
- }
-
- /*! for triangle meshes and bezier curves only */
- public:
-
-
- /*! Sets ray mask. */
- virtual void setMask(unsigned mask) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Sets specified buffer. */
- virtual void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Gets specified buffer. */
- virtual void* getBuffer(RTCBufferType type, unsigned int slot) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Set intersection filter function for ray packets of size N. */
- virtual void setIntersectionFilterFunctionN (RTCFilterFunctionN filterN);
-
- /*! Set occlusion filter function for ray packets of size N. */
- virtual void setOcclusionFilterFunctionN (RTCFilterFunctionN filterN);
-
- /*! for instances only */
- public:
-
- /*! Sets the instanced scene */
- virtual void setInstancedScene(const Ref<Scene>& scene) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Sets transformation of the instance */
- virtual void setTransform(const AffineSpace3fa& transform, unsigned int timeStep) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Sets transformation of the instance */
- virtual void setQuaternionDecomposition(const AffineSpace3ff& qd, unsigned int timeStep) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Returns the transformation of the instance */
- virtual AffineSpace3fa getTransform(float time) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! for user geometries only */
- public:
-
- /*! Set bounds function. */
- virtual void setBoundsFunction (RTCBoundsFunction bounds, void* userPtr) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Set intersect function for ray packets of size N. */
- virtual void setIntersectFunctionN (RTCIntersectFunctionN intersect) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Set occlusion function for ray packets of size N. */
- virtual void setOccludedFunctionN (RTCOccludedFunctionN occluded) {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
- }
-
- /*! Set point query function. */
- void setPointQueryFunction(RTCPointQueryFunction func);
-
- /*! returns number of time segments */
- __forceinline unsigned numTimeSegments () const {
- return numTimeSteps-1;
- }
-
- public:
-
- virtual PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefArray not implemented for this geometry");
- }
-
- virtual PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefMBArray not implemented for this geometry");
- }
-
- virtual PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefMBArray not implemented for this geometry");
- }
-
- virtual LinearSpace3fa computeAlignedSpace(const size_t primID) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeAlignedSpace not implemented for this geometry");
- }
-
- virtual LinearSpace3fa computeAlignedSpaceMB(const size_t primID, const BBox1f time_range) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeAlignedSpace not implemented for this geometry");
- }
-
- virtual Vec3fa computeDirection(unsigned int primID) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeDirection not implemented for this geometry");
- }
-
- virtual Vec3fa computeDirection(unsigned int primID, size_t time) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeDirection not implemented for this geometry");
- }
-
- virtual BBox3fa vbounds(size_t primID) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry");
- }
-
- virtual BBox3fa vbounds(const LinearSpace3fa& space, size_t primID) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry");
- }
-
- virtual BBox3fa vbounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry");
- }
-
- virtual LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry");
- }
-
- virtual LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry");
- }
-
- virtual LBBox3fa vlinearBounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry");
- }
-
- public:
- __forceinline bool hasIntersectionFilter() const { return intersectionFilterN != nullptr; }
- __forceinline bool hasOcclusionFilter() const { return occlusionFilterN != nullptr; }
-
- public:
- Device* device; //!< device this geometry belongs to
-
- void* userPtr; //!< user pointer
- unsigned int numPrimitives; //!< number of primitives of this geometry
-
- unsigned int numTimeSteps; //!< number of time steps
- float fnumTimeSegments; //!< number of time segments (precalculation)
- BBox1f time_range; //!< motion blur time range
-
- unsigned int mask; //!< for masking out geometry
- unsigned int modCounter_ = 1; //!< counter for every modification - used to rebuild scenes when geo is modified
-
- struct {
- GType gtype : 8; //!< geometry type
- GSubType gsubtype : 8; //!< geometry subtype
- RTCBuildQuality quality : 3; //!< build quality for geometry
- unsigned state : 2;
- bool enabled : 1; //!< true if geometry is enabled
- };
-
- RTCFilterFunctionN intersectionFilterN;
- RTCFilterFunctionN occlusionFilterN;
- RTCPointQueryFunction pointQueryFunc;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/hit.h b/thirdparty/embree-aarch64/kernels/common/hit.h
deleted file mode 100644
index 32a198cdfe..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/hit.h
+++ /dev/null
@@ -1,114 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "ray.h"
-#include "instance_stack.h"
-
-namespace embree
-{
- /* Hit structure for K hits */
- template<int K>
- struct HitK
- {
- /* Default construction does nothing */
- __forceinline HitK() {}
-
- /* Constructs a hit */
- __forceinline HitK(const RTCIntersectContext* context, const vuint<K>& geomID, const vuint<K>& primID, const vfloat<K>& u, const vfloat<K>& v, const Vec3vf<K>& Ng)
- : Ng(Ng), u(u), v(v), primID(primID), geomID(geomID)
- {
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
- instID[l] = RTC_INVALID_GEOMETRY_ID;
- instance_id_stack::copy(context->instID, instID);
- }
-
- /* Returns the size of the hit */
- static __forceinline size_t size() { return K; }
-
- public:
- Vec3vf<K> Ng; // geometry normal
- vfloat<K> u; // barycentric u coordinate of hit
- vfloat<K> v; // barycentric v coordinate of hit
- vuint<K> primID; // primitive ID
- vuint<K> geomID; // geometry ID
- vuint<K> instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
- };
-
- /* Specialization for a single hit */
- template<>
- struct __aligned(16) HitK<1>
- {
- /* Default construction does nothing */
- __forceinline HitK() {}
-
- /* Constructs a hit */
- __forceinline HitK(const RTCIntersectContext* context, unsigned int geomID, unsigned int primID, float u, float v, const Vec3fa& Ng)
- : Ng(Ng.x,Ng.y,Ng.z), u(u), v(v), primID(primID), geomID(geomID)
- {
- instance_id_stack::copy(context->instID, instID);
- }
-
- /* Returns the size of the hit */
- static __forceinline size_t size() { return 1; }
-
- public:
- Vec3<float> Ng; // geometry normal
- float u; // barycentric u coordinate of hit
- float v; // barycentric v coordinate of hit
- unsigned int primID; // primitive ID
- unsigned int geomID; // geometry ID
- unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
- };
-
- /* Shortcuts */
- typedef HitK<1> Hit;
- typedef HitK<4> Hit4;
- typedef HitK<8> Hit8;
- typedef HitK<16> Hit16;
-
- /* Outputs hit to stream */
- template<int K>
- __forceinline embree_ostream operator<<(embree_ostream cout, const HitK<K>& ray)
- {
- cout << "{ " << embree_endl
- << " Ng = " << ray.Ng << embree_endl
- << " u = " << ray.u << embree_endl
- << " v = " << ray.v << embree_endl
- << " primID = " << ray.primID << embree_endl
- << " geomID = " << ray.geomID << embree_endl
- << " instID =";
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
- {
- cout << " " << ray.instID[l];
- }
- cout << embree_endl;
- return cout << "}";
- }
-
- template<typename Hit>
- __forceinline void copyHitToRay(RayHit& ray, const Hit& hit)
- {
- ray.Ng = hit.Ng;
- ray.u = hit.u;
- ray.v = hit.v;
- ray.primID = hit.primID;
- ray.geomID = hit.geomID;
- instance_id_stack::copy(hit.instID, ray.instID);
- }
-
- template<int K>
- __forceinline void copyHitToRay(const vbool<K> &mask, RayHitK<K> &ray, const HitK<K> &hit)
- {
- vfloat<K>::storeu(mask,&ray.Ng.x, hit.Ng.x);
- vfloat<K>::storeu(mask,&ray.Ng.y, hit.Ng.y);
- vfloat<K>::storeu(mask,&ray.Ng.z, hit.Ng.z);
- vfloat<K>::storeu(mask,&ray.u, hit.u);
- vfloat<K>::storeu(mask,&ray.v, hit.v);
- vuint<K>::storeu(mask,&ray.primID, hit.primID);
- vuint<K>::storeu(mask,&ray.geomID, hit.geomID);
- instance_id_stack::copy(hit.instID, ray.instID, mask);
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/instance_stack.h b/thirdparty/embree-aarch64/kernels/common/instance_stack.h
deleted file mode 100644
index d7e3637f7b..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/instance_stack.h
+++ /dev/null
@@ -1,199 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "rtcore.h"
-
-namespace embree {
-namespace instance_id_stack {
-
-static_assert(RTC_MAX_INSTANCE_LEVEL_COUNT > 0,
- "RTC_MAX_INSTANCE_LEVEL_COUNT must be greater than 0.");
-
-/*******************************************************************************
- * Instance ID stack manipulation.
- * This is used from the instance intersector.
- ******************************************************************************/
-
-/*
- * Push an instance to the stack.
- */
-RTC_FORCEINLINE bool push(RTCIntersectContext* context,
- unsigned instanceId)
-{
-#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
- const bool spaceAvailable = context->instStackSize < RTC_MAX_INSTANCE_LEVEL_COUNT;
- /* We assert here because instances are silently dropped when the stack is full.
- This might be quite hard to find in production. */
- assert(spaceAvailable);
- if (likely(spaceAvailable))
- context->instID[context->instStackSize++] = instanceId;
- return spaceAvailable;
-#else
- const bool spaceAvailable = (context->instID[0] == RTC_INVALID_GEOMETRY_ID);
- assert(spaceAvailable);
- if (likely(spaceAvailable))
- context->instID[0] = instanceId;
- return spaceAvailable;
-#endif
-}
-
-
-/*
- * Pop the last instance pushed to the stack.
- * Do not call on an empty stack.
- */
-RTC_FORCEINLINE void pop(RTCIntersectContext* context)
-{
- assert(context);
-#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
- assert(context->instStackSize > 0);
- context->instID[--context->instStackSize] = RTC_INVALID_GEOMETRY_ID;
-#else
- assert(context->instID[0] != RTC_INVALID_GEOMETRY_ID);
- context->instID[0] = RTC_INVALID_GEOMETRY_ID;
-#endif
-}
-
-/*******************************************************************************
- * Optimized instance id stack copy.
- * The copy() function at the bottom of this block will either copy full
- * stacks or copy only until the last valid element has been copied, depending
- * on RTC_MAX_INSTANCE_LEVEL_COUNT.
- ******************************************************************************/
-
-/*
- * Plain array assignment. This works for scalar->scalar,
- * scalar->vector, and vector->vector.
- */
-template <class Src, class Tgt>
-RTC_FORCEINLINE void level_copy(unsigned level, Src* src, Tgt* tgt)
-{
- tgt[level] = src[level];
-}
-
-/*
- * Masked SIMD vector->vector store.
- */
-template <int K>
-RTC_FORCEINLINE void level_copy(unsigned level, const vuint<K>* src, vuint<K>* tgt, const vbool<K>& mask)
-{
- vuint<K>::storeu(mask, tgt + level, src[level]);
-}
-
-/*
- * Masked scalar->SIMD vector store.
- */
-template <int K>
-RTC_FORCEINLINE void level_copy(unsigned level, const unsigned* src, vuint<K>* tgt, const vbool<K>& mask)
-{
- vuint<K>::store(mask, tgt + level, src[level]);
-}
-
-/*
- * Indexed assign from vector to scalar.
- */
-template <int K>
-RTC_FORCEINLINE void level_copy(unsigned level, const vuint<K>* src, unsigned* tgt, const size_t& idx)
-{
- tgt[level] = src[level][idx];
-}
-
-/*
- * Indexed assign from scalar to vector.
- */
-template <int K>
-RTC_FORCEINLINE void level_copy(unsigned level, const unsigned* src, vuint<K>* tgt, const size_t& idx)
-{
- tgt[level][idx] = src[level];
-}
-
-/*
- * Indexed assign from vector to vector.
- */
-template <int K>
-RTC_FORCEINLINE void level_copy(unsigned level, const vuint<K>* src, vuint<K>* tgt, const size_t& i, const size_t& j)
-{
- tgt[level][j] = src[level][i];
-}
-
-/*
- * Check if the given stack level is valid.
- * These are only used for large max stack sizes.
- */
-RTC_FORCEINLINE bool level_valid(unsigned level, const unsigned* stack)
-{
- return stack[level] != RTC_INVALID_GEOMETRY_ID;
-}
-RTC_FORCEINLINE bool level_valid(unsigned level, const unsigned* stack, const size_t& /*i*/)
-{
- return stack[level] != RTC_INVALID_GEOMETRY_ID;
-}
-template <int K>
-RTC_FORCEINLINE bool level_valid(unsigned level, const unsigned* stack, const vbool<K>& /*mask*/)
-{
- return stack[level] != RTC_INVALID_GEOMETRY_ID;
-}
-
-template <int K>
-RTC_FORCEINLINE bool level_valid(unsigned level, const vuint<K>* stack)
-{
- return any(stack[level] != RTC_INVALID_GEOMETRY_ID);
-}
-template <int K>
-RTC_FORCEINLINE bool level_valid(unsigned level, const vuint<K>* stack, const vbool<K>& mask)
-{
- return any(mask & (stack[level] != RTC_INVALID_GEOMETRY_ID));
-}
-
-template <int K>
-RTC_FORCEINLINE bool level_valid(unsigned level, const vuint<K>* stack, const size_t& i)
-{
- return stack[level][i] != RTC_INVALID_GEOMETRY_ID;
-}
-template <int K>
-RTC_FORCEINLINE bool level_valid(unsigned level, const vuint<K>* stack, const size_t& i, const size_t& /*j*/)
-{
- return stack[level][i] != RTC_INVALID_GEOMETRY_ID;
-}
-
-/*
- * Copy an instance ID stack.
- *
- * This function automatically selects a LevelFunctor from the above Assign
- * structs.
- */
-template <class Src, class Tgt, class... Args>
-RTC_FORCEINLINE void copy(Src src, Tgt tgt, Args&&... args)
-{
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
- /*
- * Avoid all loops for only one level.
- */
- level_copy(0, src, tgt, std::forward<Args>(args)...);
-
-#elif (RTC_MAX_INSTANCE_LEVEL_COUNT <= 4)
- /*
- * It is faster to avoid the valid test for low level counts.
- * Just copy the whole stack.
- */
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
- level_copy(l, src, tgt, std::forward<Args>(args)...);
-
-#else
- /*
- * For general stack sizes, it pays off to test for validity.
- */
- bool valid = true;
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT && valid; ++l)
- {
- level_copy(l, src, tgt, std::forward<Args>(args)...);
- valid = level_valid(l, src, std::forward<Args>(args)...);
- }
-#endif
-}
-
-} // namespace instance_id_stack
-} // namespace embree
-
diff --git a/thirdparty/embree-aarch64/kernels/common/isa.h b/thirdparty/embree-aarch64/kernels/common/isa.h
deleted file mode 100644
index 63fb8d3351..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/isa.h
+++ /dev/null
@@ -1,271 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../../common/sys/platform.h"
-#include "../../common/sys/sysinfo.h"
-
-namespace embree
-{
-#define DEFINE_SYMBOL2(type,name) \
- typedef type (*name##Func)(); \
- name##Func name;
-
-#define DECLARE_SYMBOL2(type,name) \
- namespace sse2 { extern type name(); } \
- namespace sse42 { extern type name(); } \
- namespace avx { extern type name(); } \
- namespace avx2 { extern type name(); } \
- namespace avx512knl { extern type name(); } \
- namespace avx512skx { extern type name(); } \
- void name##_error2() { throw_RTCError(RTC_ERROR_UNKNOWN,"internal error in ISA selection for " TOSTRING(name)); } \
- type name##_error() { return type(name##_error2); } \
- type name##_zero() { return type(nullptr); }
-
-#define DECLARE_ISA_FUNCTION(type,symbol,args) \
- namespace sse2 { extern type symbol(args); } \
- namespace sse42 { extern type symbol(args); } \
- namespace avx { extern type symbol(args); } \
- namespace avx2 { extern type symbol(args); } \
- namespace avx512knl { extern type symbol(args); } \
- namespace avx512skx { extern type symbol(args); } \
- inline type symbol##_error(args) { throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"function " TOSTRING(symbol) " not supported by your CPU"); } \
- typedef type (*symbol##Ty)(args); \
-
-#define DEFINE_ISA_FUNCTION(type,symbol,args) \
- typedef type (*symbol##Func)(args); \
- symbol##Func symbol;
-
-#define ZERO_SYMBOL(features,intersector) \
- intersector = intersector##_zero;
-
-#define INIT_SYMBOL(features,intersector) \
- intersector = decltype(intersector)(intersector##_error);
-
-#define SELECT_SYMBOL_DEFAULT(features,intersector) \
- intersector = isa::intersector;
-
-#if defined(__SSE__) || defined(__ARM_NEON)
-#if !defined(EMBREE_TARGET_SIMD4)
-#define EMBREE_TARGET_SIMD4
-#endif
-#endif
-
-#if defined(EMBREE_TARGET_SSE42)
-#define SELECT_SYMBOL_SSE42(features,intersector) \
- if ((features & SSE42) == SSE42) intersector = sse42::intersector;
-#else
-#define SELECT_SYMBOL_SSE42(features,intersector)
-#endif
-
-#if defined(EMBREE_TARGET_AVX) || defined(__AVX__)
-#if !defined(EMBREE_TARGET_SIMD8)
-#define EMBREE_TARGET_SIMD8
-#endif
-#if defined(__AVX__) // if default ISA is >= AVX we treat AVX target as default target
-#define SELECT_SYMBOL_AVX(features,intersector) \
- if ((features & ISA) == ISA) intersector = isa::intersector;
-#else
-#define SELECT_SYMBOL_AVX(features,intersector) \
- if ((features & AVX) == AVX) intersector = avx::intersector;
-#endif
-#else
-#define SELECT_SYMBOL_AVX(features,intersector)
-#endif
-
-#if defined(EMBREE_TARGET_AVX2)
-#if !defined(EMBREE_TARGET_SIMD8)
-#define EMBREE_TARGET_SIMD8
-#endif
-#define SELECT_SYMBOL_AVX2(features,intersector) \
- if ((features & AVX2) == AVX2) intersector = avx2::intersector;
-#else
-#define SELECT_SYMBOL_AVX2(features,intersector)
-#endif
-
-#if defined(EMBREE_TARGET_AVX512KNL)
-#if !defined(EMBREE_TARGET_SIMD16)
-#define EMBREE_TARGET_SIMD16
-#endif
-#define SELECT_SYMBOL_AVX512KNL(features,intersector) \
- if ((features & AVX512KNL) == AVX512KNL) intersector = avx512knl::intersector;
-#else
-#define SELECT_SYMBOL_AVX512KNL(features,intersector)
-#endif
-
-#if defined(EMBREE_TARGET_AVX512SKX)
-#if !defined(EMBREE_TARGET_SIMD16)
-#define EMBREE_TARGET_SIMD16
-#endif
-#define SELECT_SYMBOL_AVX512SKX(features,intersector) \
- if ((features & AVX512SKX) == AVX512SKX) intersector = avx512skx::intersector;
-#else
-#define SELECT_SYMBOL_AVX512SKX(features,intersector)
-#endif
-
-#define SELECT_SYMBOL_DEFAULT_SSE42(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_SSE42_AVX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_AVX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_AVX_AVX2(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL_AVX512SKX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_AVX_AVX512SKX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_INIT_AVX(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector);
-
-#define SELECT_SYMBOL_INIT_AVX_AVX2(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector);
-
-#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_INIT_SSE42_AVX_AVX2(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector);
-
-#define SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector);
-
-#define SELECT_SYMBOL_INIT_AVX_AVX512KNL_AVX512SKX(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector);
-
-#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_INIT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_ZERO_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \
- ZERO_SYMBOL(features,intersector); \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \
- SELECT_SYMBOL_DEFAULT(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,intersector) \
- INIT_SYMBOL(features,intersector); \
- SELECT_SYMBOL_AVX512KNL(features,intersector); \
- SELECT_SYMBOL_AVX512SKX(features,intersector);
-
-#define SELECT_SYMBOL_SSE42_AVX_AVX2(features,intersector) \
- SELECT_SYMBOL_SSE42(features,intersector); \
- SELECT_SYMBOL_AVX(features,intersector); \
- SELECT_SYMBOL_AVX2(features,intersector);
-
- struct VerifyMultiTargetLinking {
- static __noinline int getISA(int depth = 5) {
- if (depth == 0) return ISA;
- else return getISA(depth-1);
- }
- };
- namespace sse2 { int getISA(); };
- namespace sse42 { int getISA(); };
- namespace avx { int getISA(); };
- namespace avx2 { int getISA(); };
- namespace avx512knl { int getISA(); };
- namespace avx512skx { int getISA(); };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/motion_derivative.h b/thirdparty/embree-aarch64/kernels/common/motion_derivative.h
deleted file mode 100644
index 82953f0e89..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/motion_derivative.h
+++ /dev/null
@@ -1,325 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../../common/math/affinespace.h"
-#include "../../common/math/interval.h"
-
-#include <functional>
-
-namespace embree {
-
-#define MOTION_DERIVATIVE_ROOT_EPSILON 1e-4f
-
-static void motion_derivative_coefficients(const float *p, float *coeff);
-
-struct MotionDerivativeCoefficients
-{
- float theta;
- float coeffs[3*8*7];
-
- MotionDerivativeCoefficients() {}
-
- // xfm0 and xfm1 are interpret as quaternion decomposition
- MotionDerivativeCoefficients(AffineSpace3ff const& xfm0, AffineSpace3ff const& xfm1)
- {
- // cosTheta of the two quaternions
- const float cosTheta = min(1.f, max(-1.f,
- xfm0.l.vx.w * xfm1.l.vx.w
- + xfm0.l.vy.w * xfm1.l.vy.w
- + xfm0.l.vz.w * xfm1.l.vz.w
- + xfm0.p.w * xfm1.p.w));
-
- theta = std::acos(cosTheta);
- Vec4f qperp(xfm1.p.w, xfm1.l.vx.w, xfm1.l.vy.w, xfm1.l.vz.w);
- if (cosTheta < 0.995f) {
- // compute perpendicular quaternion
- qperp.x = xfm1.p.w - cosTheta * xfm0.p.w;
- qperp.y = xfm1.l.vx.w - cosTheta * xfm0.l.vx.w;
- qperp.z = xfm1.l.vy.w - cosTheta * xfm0.l.vy.w;
- qperp.w = xfm1.l.vz.w - cosTheta * xfm0.l.vz.w;
- qperp = normalize(qperp);
- }
- const float p[33] = {
- theta,
- xfm0.l.vx.y, xfm0.l.vx.z, xfm0.l.vy.z, // translation component of xfm0
- xfm1.l.vx.y, xfm1.l.vx.z, xfm1.l.vy.z, // translation component of xfm1
- xfm0.p.w, xfm0.l.vx.w, xfm0.l.vy.w, xfm0.l.vz.w, // quaternion of xfm0
- qperp.x, qperp.y, qperp.z, qperp.w,
- xfm0.l.vx.x, xfm0.l.vy.x, xfm0.l.vz.x, xfm0.p.x, // scale/skew component of xfm0
- xfm0.l.vy.y, xfm0.l.vz.y, xfm0.p.y,
- xfm0.l.vz.z, xfm0.p.z,
- xfm1.l.vx.x, xfm1.l.vy.x, xfm1.l.vz.x, xfm1.p.x, // scale/skew component of xfm1
- xfm1.l.vy.y, xfm1.l.vz.y, xfm1.p.y,
- xfm1.l.vz.z, xfm1.p.z
- };
- motion_derivative_coefficients(p, coeffs);
- }
-};
-
-struct MotionDerivative
-{
- float twoTheta;
- float c[8];
-
- MotionDerivative(MotionDerivativeCoefficients const& mdc,
- int dim, Vec3fa const& p0, Vec3fa const& p1)
- : twoTheta(2.f*mdc.theta)
- {
- const float p[7] = { 1, p0.x, p0.y, p0.z, p1.x, p1.y, p1.z };
- for (int i = 0; i < 8; ++i) {
- c[i] = 0;
- for (int j = 0; j < 7; ++j) {
- c[i] += mdc.coeffs[8*7*dim + i*7 + j] * p[j];
- }
- }
- }
-
- template<typename T>
- struct EvalMotionDerivative
- {
- MotionDerivative const& md;
- float offset;
-
- EvalMotionDerivative(MotionDerivative const& md, float offset) : md(md), offset(offset) {}
-
- T operator()(T const& time) const {
- return md.c[0] + md.c[1] * time
- + (md.c[2] + md.c[3] * time + md.c[4] * time * time) * cos(md.twoTheta * time)
- + (md.c[5] + md.c[6] * time + md.c[7] * time * time) * sin(md.twoTheta * time)
- + offset;
- }
- };
-
- unsigned int findRoots(
- Interval1f const& interval,
- float offset,
- float* roots,
- unsigned int maxNumRoots)
- {
- unsigned int numRoots = 0;
- EvalMotionDerivative<Interval1f> eval(*this, offset);
- findRoots(eval, interval, numRoots, roots, maxNumRoots);
- return numRoots;
- }
-
- template<typename Eval>
- static void findRoots(
-
- Eval const& eval,
- Interval1f const& interval,
- unsigned int& numRoots,
- float* roots,
- unsigned int maxNumRoots)
- {
- Interval1f range = eval(interval);
- if (range.lower > 0 || range.upper < 0 || range.lower >= range.upper) return;
-
- const float split = 0.5f * (interval.upper + interval.lower);
- if (interval.upper-interval.lower < 1e-7f || abs(split-interval.lower) < 1e-7f || abs(split-interval.upper) < 1e-7f)
- {
- // check if the root already exists
- for (unsigned int k = 0; k < numRoots && k < maxNumRoots; ++k) {
- if (abs(roots[k]-split) < MOTION_DERIVATIVE_ROOT_EPSILON)
- return;
- }
- if (numRoots < maxNumRoots) {
- roots[numRoots++] = split;
- }
- if (numRoots > maxNumRoots) {
- printf("error: more roots than expected\n"); // FIXME: workaround for ICC2019.4 compiler bug under macOS
- return;
- }
- return;
- }
-
- findRoots(eval, Interval1f(interval.lower, split), numRoots, roots, maxNumRoots);
- findRoots(eval, Interval1f(split, interval.upper), numRoots, roots, maxNumRoots);
- }
-};
-
-/******************************************************************************
- * Code generated with sympy 1.4 *
- * See http://www.sympy.org/ for more information. *
- * *
- * see *
- * *
- * scripts/generate_motion_derivative_coefficients.py *
- * *
- * for how this code is generated *
- * *
- ******************************************************************************/
-static void motion_derivative_coefficients(const float *p, float *coeff)
-{
- coeff[0] = -p[1] + p[4] - p[7]*p[9]*p[23] + p[7]*p[9]*p[32] + p[7]*p[10]*p[21] - p[7]*p[10]*p[30] - p[8]*p[9]*p[21] + p[8]*p[9]*p[30] - p[8]*p[10]*p[23] + p[8]*p[10]*p[32] + p[9]*p[9]*p[18] - p[9]*p[9]*p[27] + p[10]*p[10]*p[18] - p[10]*p[10]*p[27] - p[11]*p[13]*p[23] + p[11]*p[13]*p[32] + p[11]*p[14]*p[21] - p[11]*p[14]*p[30] - p[12]*p[13]*p[21] + p[12]*p[13]*p[30] - p[12]*p[14]*p[23] + p[12]*p[14]*p[32] + p[13]*p[13]*p[18] - p[13]*p[13]*p[27] + p[14]*p[14]*p[18] - p[14]*p[14]*p[27] - p[18] + p[27];
- coeff[1] = 2*p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - p[14]*p[14]*p[24] - 2*p[15] + p[24];
- coeff[2] = 2*p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - p[14]*p[14]*p[25] - 2*p[16] + p[25];
- coeff[3] = -2*p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - p[14]*p[14]*p[26] - 2*p[17] + p[26];
- coeff[4] = (-p[9]*p[9] - p[10]*p[10] - p[13]*p[13] - p[14]*p[14] + 1)*p[15];
- coeff[5] = -p[7]*p[10]*p[19] + p[8]*p[9]*p[19] - p[9]*p[9]*p[16] - p[10]*p[10]*p[16] - p[11]*p[14]*p[19] + p[12]*p[13]*p[19] - p[13]*p[13]*p[16] - p[14]*p[14]*p[16] + p[16];
- coeff[6] = p[7]*p[9]*p[22] - p[7]*p[10]*p[20] + p[8]*p[9]*p[20] + p[8]*p[10]*p[22] - p[9]*p[9]*p[17] - p[10]*p[10]*p[17] + p[11]*p[13]*p[22] - p[11]*p[14]*p[20] + p[12]*p[13]*p[20] + p[12]*p[14]*p[22] - p[13]*p[13]*p[17] - p[14]*p[14]*p[17] + p[17];
- coeff[7] = 0;
- coeff[8] = -2*p[9]*p[9]*p[15] + 2*p[9]*p[9]*p[24] - 2*p[10]*p[10]*p[15] + 2*p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + 2*p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + 2*p[14]*p[14]*p[24] + 2*p[15] - 2*p[24];
- coeff[9] = -2*p[7]*p[10]*p[19] + 2*p[7]*p[10]*p[28] + 2*p[8]*p[9]*p[19] - 2*p[8]*p[9]*p[28] - 2*p[9]*p[9]*p[16] + 2*p[9]*p[9]*p[25] - 2*p[10]*p[10]*p[16] + 2*p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + 2*p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - 2*p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + 2*p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + 2*p[14]*p[14]*p[25] + 2*p[16] - 2*p[25];
- coeff[10] = 2*p[7]*p[9]*p[22] - 2*p[7]*p[9]*p[31] - 2*p[7]*p[10]*p[20] + 2*p[7]*p[10]*p[29] + 2*p[8]*p[9]*p[20] - 2*p[8]*p[9]*p[29] + 2*p[8]*p[10]*p[22] - 2*p[8]*p[10]*p[31] - 2*p[9]*p[9]*p[17] + 2*p[9]*p[9]*p[26] - 2*p[10]*p[10]*p[17] + 2*p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - 2*p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + 2*p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - 2*p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - 2*p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + 2*p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + 2*p[14]*p[14]*p[26] + 2*p[17] - 2*p[26];
- coeff[11] = 2*p[9]*p[9]*p[15] - 2*p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - 2*p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - 2*p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - 2*p[14]*p[14]*p[24] - 2*p[15] + 2*p[24];
- coeff[12] = 2*p[7]*p[10]*p[19] - 2*p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + 2*p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - 2*p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - 2*p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - 2*p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + 2*p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - 2*p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - 2*p[14]*p[14]*p[25] - 2*p[16] + 2*p[25];
- coeff[13] = -2*p[7]*p[9]*p[22] + 2*p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - 2*p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + 2*p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + 2*p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - 2*p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - 2*p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + 2*p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - 2*p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + 2*p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + 2*p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - 2*p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - 2*p[14]*p[14]*p[26] - 2*p[17] + 2*p[26];
- coeff[14] = 2*p[0]*p[7]*p[11]*p[18] + 2*p[0]*p[7]*p[13]*p[23] - 2*p[0]*p[7]*p[14]*p[21] + 2*p[0]*p[8]*p[12]*p[18] + 2*p[0]*p[8]*p[13]*p[21] + 2*p[0]*p[8]*p[14]*p[23] + 2*p[0]*p[9]*p[11]*p[23] + 2*p[0]*p[9]*p[12]*p[21] - 2*p[0]*p[9]*p[13]*p[18] - 2*p[0]*p[10]*p[11]*p[21] + 2*p[0]*p[10]*p[12]*p[23] - 2*p[0]*p[10]*p[14]*p[18] - p[7]*p[9]*p[23] + p[7]*p[9]*p[32] + p[7]*p[10]*p[21] - p[7]*p[10]*p[30] - p[8]*p[9]*p[21] + p[8]*p[9]*p[30] - p[8]*p[10]*p[23] + p[8]*p[10]*p[32] + p[9]*p[9]*p[18] - p[9]*p[9]*p[27] + p[10]*p[10]*p[18] - p[10]*p[10]*p[27] + p[11]*p[13]*p[23] - p[11]*p[13]*p[32] - p[11]*p[14]*p[21] + p[11]*p[14]*p[30] + p[12]*p[13]*p[21] - p[12]*p[13]*p[30] + p[12]*p[14]*p[23] - p[12]*p[14]*p[32] - p[13]*p[13]*p[18] + p[13]*p[13]*p[27] - p[14]*p[14]*p[18] + p[14]*p[14]*p[27];
- coeff[15] = 2*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[8]*p[12]*p[15] - 2*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[10]*p[14]*p[15] + 2*p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + p[14]*p[14]*p[24];
- coeff[16] = 2*p[0]*p[7]*p[11]*p[16] - 2*p[0]*p[7]*p[14]*p[19] + 2*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[9]*p[12]*p[19] - 2*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[14]*p[16] + 2*p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + p[14]*p[14]*p[25];
- coeff[17] = 2*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[13]*p[22] - 2*p[0]*p[7]*p[14]*p[20] + 2*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[12]*p[20] - 2*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[10]*p[11]*p[20] + 2*p[0]*p[10]*p[12]*p[22] - 2*p[0]*p[10]*p[14]*p[17] - 2*p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + p[14]*p[14]*p[26];
- coeff[18] = (-p[9]*p[9] - p[10]*p[10] + p[13]*p[13] + p[14]*p[14])*p[15];
- coeff[19] = -p[7]*p[10]*p[19] + p[8]*p[9]*p[19] - p[9]*p[9]*p[16] - p[10]*p[10]*p[16] + p[11]*p[14]*p[19] - p[12]*p[13]*p[19] + p[13]*p[13]*p[16] + p[14]*p[14]*p[16];
- coeff[20] = p[7]*p[9]*p[22] - p[7]*p[10]*p[20] + p[8]*p[9]*p[20] + p[8]*p[10]*p[22] - p[9]*p[9]*p[17] - p[10]*p[10]*p[17] - p[11]*p[13]*p[22] + p[11]*p[14]*p[20] - p[12]*p[13]*p[20] - p[12]*p[14]*p[22] + p[13]*p[13]*p[17] + p[14]*p[14]*p[17];
- coeff[21] = 2*(-p[7]*p[11]*p[18] + p[7]*p[11]*p[27] - p[7]*p[13]*p[23] + p[7]*p[13]*p[32] + p[7]*p[14]*p[21] - p[7]*p[14]*p[30] - p[8]*p[12]*p[18] + p[8]*p[12]*p[27] - p[8]*p[13]*p[21] + p[8]*p[13]*p[30] - p[8]*p[14]*p[23] + p[8]*p[14]*p[32] - p[9]*p[11]*p[23] + p[9]*p[11]*p[32] - p[9]*p[12]*p[21] + p[9]*p[12]*p[30] + p[9]*p[13]*p[18] - p[9]*p[13]*p[27] + p[10]*p[11]*p[21] - p[10]*p[11]*p[30] - p[10]*p[12]*p[23] + p[10]*p[12]*p[32] + p[10]*p[14]*p[18] - p[10]*p[14]*p[27])*p[0];
- coeff[22] = -4*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[7]*p[11]*p[24] - 4*p[0]*p[8]*p[12]*p[15] + 2*p[0]*p[8]*p[12]*p[24] + 4*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[9]*p[13]*p[24] + 4*p[0]*p[10]*p[14]*p[15] - 2*p[0]*p[10]*p[14]*p[24] - 2*p[9]*p[9]*p[15] + 2*p[9]*p[9]*p[24] - 2*p[10]*p[10]*p[15] + 2*p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - 2*p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - 2*p[14]*p[14]*p[24];
- coeff[23] = -4*p[0]*p[7]*p[11]*p[16] + 2*p[0]*p[7]*p[11]*p[25] + 4*p[0]*p[7]*p[14]*p[19] - 2*p[0]*p[7]*p[14]*p[28] - 4*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[12]*p[25] - 4*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[8]*p[13]*p[28] - 4*p[0]*p[9]*p[12]*p[19] + 2*p[0]*p[9]*p[12]*p[28] + 4*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[9]*p[13]*p[25] + 4*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[11]*p[28] + 4*p[0]*p[10]*p[14]*p[16] - 2*p[0]*p[10]*p[14]*p[25] - 2*p[7]*p[10]*p[19] + 2*p[7]*p[10]*p[28] + 2*p[8]*p[9]*p[19] - 2*p[8]*p[9]*p[28] - 2*p[9]*p[9]*p[16] + 2*p[9]*p[9]*p[25] - 2*p[10]*p[10]*p[16] + 2*p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - 2*p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + 2*p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - 2*p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - 2*p[14]*p[14]*p[25];
- coeff[24] = -4*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[11]*p[26] - 4*p[0]*p[7]*p[13]*p[22] + 2*p[0]*p[7]*p[13]*p[31] + 4*p[0]*p[7]*p[14]*p[20] - 2*p[0]*p[7]*p[14]*p[29] - 4*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[12]*p[26] - 4*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[13]*p[29] - 4*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[8]*p[14]*p[31] - 4*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[11]*p[31] - 4*p[0]*p[9]*p[12]*p[20] + 2*p[0]*p[9]*p[12]*p[29] + 4*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[9]*p[13]*p[26] + 4*p[0]*p[10]*p[11]*p[20] - 2*p[0]*p[10]*p[11]*p[29] - 4*p[0]*p[10]*p[12]*p[22] + 2*p[0]*p[10]*p[12]*p[31] + 4*p[0]*p[10]*p[14]*p[17] - 2*p[0]*p[10]*p[14]*p[26] + 2*p[7]*p[9]*p[22] - 2*p[7]*p[9]*p[31] - 2*p[7]*p[10]*p[20] + 2*p[7]*p[10]*p[29] + 2*p[8]*p[9]*p[20] - 2*p[8]*p[9]*p[29] + 2*p[8]*p[10]*p[22] - 2*p[8]*p[10]*p[31] - 2*p[9]*p[9]*p[17] + 2*p[9]*p[9]*p[26] - 2*p[10]*p[10]*p[17] + 2*p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + 2*p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - 2*p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + 2*p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + 2*p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - 2*p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - 2*p[14]*p[14]*p[26];
- coeff[25] = 2*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[8]*p[12]*p[15] - 2*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[10]*p[14]*p[15] + 2*p[9]*p[9]*p[15] - 2*p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - 2*p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + 2*p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + 2*p[14]*p[14]*p[24];
- coeff[26] = 2*p[0]*p[7]*p[11]*p[16] - 2*p[0]*p[7]*p[14]*p[19] + 2*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[9]*p[12]*p[19] - 2*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[14]*p[16] + 2*p[7]*p[10]*p[19] - 2*p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + 2*p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - 2*p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - 2*p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + 2*p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - 2*p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + 2*p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + 2*p[14]*p[14]*p[25];
- coeff[27] = 2*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[13]*p[22] - 2*p[0]*p[7]*p[14]*p[20] + 2*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[12]*p[20] - 2*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[10]*p[11]*p[20] + 2*p[0]*p[10]*p[12]*p[22] - 2*p[0]*p[10]*p[14]*p[17] - 2*p[7]*p[9]*p[22] + 2*p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - 2*p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + 2*p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + 2*p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - 2*p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - 2*p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - 2*p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + 2*p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - 2*p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - 2*p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + 2*p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + 2*p[14]*p[14]*p[26];
- coeff[28] = 0;
- coeff[29] = 2*(p[7]*p[11]*p[15] - p[7]*p[11]*p[24] + p[8]*p[12]*p[15] - p[8]*p[12]*p[24] - p[9]*p[13]*p[15] + p[9]*p[13]*p[24] - p[10]*p[14]*p[15] + p[10]*p[14]*p[24])*p[0];
- coeff[30] = 2*(p[7]*p[11]*p[16] - p[7]*p[11]*p[25] - p[7]*p[14]*p[19] + p[7]*p[14]*p[28] + p[8]*p[12]*p[16] - p[8]*p[12]*p[25] + p[8]*p[13]*p[19] - p[8]*p[13]*p[28] + p[9]*p[12]*p[19] - p[9]*p[12]*p[28] - p[9]*p[13]*p[16] + p[9]*p[13]*p[25] - p[10]*p[11]*p[19] + p[10]*p[11]*p[28] - p[10]*p[14]*p[16] + p[10]*p[14]*p[25])*p[0];
- coeff[31] = 2*(p[7]*p[11]*p[17] - p[7]*p[11]*p[26] + p[7]*p[13]*p[22] - p[7]*p[13]*p[31] - p[7]*p[14]*p[20] + p[7]*p[14]*p[29] + p[8]*p[12]*p[17] - p[8]*p[12]*p[26] + p[8]*p[13]*p[20] - p[8]*p[13]*p[29] + p[8]*p[14]*p[22] - p[8]*p[14]*p[31] + p[9]*p[11]*p[22] - p[9]*p[11]*p[31] + p[9]*p[12]*p[20] - p[9]*p[12]*p[29] - p[9]*p[13]*p[17] + p[9]*p[13]*p[26] - p[10]*p[11]*p[20] + p[10]*p[11]*p[29] + p[10]*p[12]*p[22] - p[10]*p[12]*p[31] - p[10]*p[14]*p[17] + p[10]*p[14]*p[26])*p[0];
- coeff[32] = 2*(-p[7]*p[11]*p[15] + p[7]*p[11]*p[24] - p[8]*p[12]*p[15] + p[8]*p[12]*p[24] + p[9]*p[13]*p[15] - p[9]*p[13]*p[24] + p[10]*p[14]*p[15] - p[10]*p[14]*p[24])*p[0];
- coeff[33] = 2*(-p[7]*p[11]*p[16] + p[7]*p[11]*p[25] + p[7]*p[14]*p[19] - p[7]*p[14]*p[28] - p[8]*p[12]*p[16] + p[8]*p[12]*p[25] - p[8]*p[13]*p[19] + p[8]*p[13]*p[28] - p[9]*p[12]*p[19] + p[9]*p[12]*p[28] + p[9]*p[13]*p[16] - p[9]*p[13]*p[25] + p[10]*p[11]*p[19] - p[10]*p[11]*p[28] + p[10]*p[14]*p[16] - p[10]*p[14]*p[25])*p[0];
- coeff[34] = 2*(-p[7]*p[11]*p[17] + p[7]*p[11]*p[26] - p[7]*p[13]*p[22] + p[7]*p[13]*p[31] + p[7]*p[14]*p[20] - p[7]*p[14]*p[29] - p[8]*p[12]*p[17] + p[8]*p[12]*p[26] - p[8]*p[13]*p[20] + p[8]*p[13]*p[29] - p[8]*p[14]*p[22] + p[8]*p[14]*p[31] - p[9]*p[11]*p[22] + p[9]*p[11]*p[31] - p[9]*p[12]*p[20] + p[9]*p[12]*p[29] + p[9]*p[13]*p[17] - p[9]*p[13]*p[26] + p[10]*p[11]*p[20] - p[10]*p[11]*p[29] - p[10]*p[12]*p[22] + p[10]*p[12]*p[31] + p[10]*p[14]*p[17] - p[10]*p[14]*p[26])*p[0];
- coeff[35] = -2*p[0]*p[7]*p[9]*p[23] + 2*p[0]*p[7]*p[10]*p[21] - 2*p[0]*p[8]*p[9]*p[21] - 2*p[0]*p[8]*p[10]*p[23] + 2*p[0]*p[9]*p[9]*p[18] + 2*p[0]*p[10]*p[10]*p[18] + 2*p[0]*p[11]*p[13]*p[23] - 2*p[0]*p[11]*p[14]*p[21] + 2*p[0]*p[12]*p[13]*p[21] + 2*p[0]*p[12]*p[14]*p[23] - 2*p[0]*p[13]*p[13]*p[18] - 2*p[0]*p[14]*p[14]*p[18] - p[7]*p[11]*p[18] + p[7]*p[11]*p[27] - p[7]*p[13]*p[23] + p[7]*p[13]*p[32] + p[7]*p[14]*p[21] - p[7]*p[14]*p[30] - p[8]*p[12]*p[18] + p[8]*p[12]*p[27] - p[8]*p[13]*p[21] + p[8]*p[13]*p[30] - p[8]*p[14]*p[23] + p[8]*p[14]*p[32] - p[9]*p[11]*p[23] + p[9]*p[11]*p[32] - p[9]*p[12]*p[21] + p[9]*p[12]*p[30] + p[9]*p[13]*p[18] - p[9]*p[13]*p[27] + p[10]*p[11]*p[21] - p[10]*p[11]*p[30] - p[10]*p[12]*p[23] + p[10]*p[12]*p[32] + p[10]*p[14]*p[18] - p[10]*p[14]*p[27];
- coeff[36] = 2*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[10]*p[10]*p[15] - 2*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[14]*p[14]*p[15] - 2*p[7]*p[11]*p[15] + p[7]*p[11]*p[24] - 2*p[8]*p[12]*p[15] + p[8]*p[12]*p[24] + 2*p[9]*p[13]*p[15] - p[9]*p[13]*p[24] + 2*p[10]*p[14]*p[15] - p[10]*p[14]*p[24];
- coeff[37] = 2*p[0]*p[7]*p[10]*p[19] - 2*p[0]*p[8]*p[9]*p[19] + 2*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[16] - 2*p[0]*p[11]*p[14]*p[19] + 2*p[0]*p[12]*p[13]*p[19] - 2*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[16] - 2*p[7]*p[11]*p[16] + p[7]*p[11]*p[25] + 2*p[7]*p[14]*p[19] - p[7]*p[14]*p[28] - 2*p[8]*p[12]*p[16] + p[8]*p[12]*p[25] - 2*p[8]*p[13]*p[19] + p[8]*p[13]*p[28] - 2*p[9]*p[12]*p[19] + p[9]*p[12]*p[28] + 2*p[9]*p[13]*p[16] - p[9]*p[13]*p[25] + 2*p[10]*p[11]*p[19] - p[10]*p[11]*p[28] + 2*p[10]*p[14]*p[16] - p[10]*p[14]*p[25];
- coeff[38] = -2*p[0]*p[7]*p[9]*p[22] + 2*p[0]*p[7]*p[10]*p[20] - 2*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[10]*p[22] + 2*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[11]*p[13]*p[22] - 2*p[0]*p[11]*p[14]*p[20] + 2*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[14]*p[22] - 2*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[14]*p[14]*p[17] - 2*p[7]*p[11]*p[17] + p[7]*p[11]*p[26] - 2*p[7]*p[13]*p[22] + p[7]*p[13]*p[31] + 2*p[7]*p[14]*p[20] - p[7]*p[14]*p[29] - 2*p[8]*p[12]*p[17] + p[8]*p[12]*p[26] - 2*p[8]*p[13]*p[20] + p[8]*p[13]*p[29] - 2*p[8]*p[14]*p[22] + p[8]*p[14]*p[31] - 2*p[9]*p[11]*p[22] + p[9]*p[11]*p[31] - 2*p[9]*p[12]*p[20] + p[9]*p[12]*p[29] + 2*p[9]*p[13]*p[17] - p[9]*p[13]*p[26] + 2*p[10]*p[11]*p[20] - p[10]*p[11]*p[29] - 2*p[10]*p[12]*p[22] + p[10]*p[12]*p[31] + 2*p[10]*p[14]*p[17] - p[10]*p[14]*p[26];
- coeff[39] = (p[7]*p[11] + p[8]*p[12] - p[9]*p[13] - p[10]*p[14])*p[15];
- coeff[40] = p[7]*p[11]*p[16] - p[7]*p[14]*p[19] + p[8]*p[12]*p[16] + p[8]*p[13]*p[19] + p[9]*p[12]*p[19] - p[9]*p[13]*p[16] - p[10]*p[11]*p[19] - p[10]*p[14]*p[16];
- coeff[41] = p[7]*p[11]*p[17] + p[7]*p[13]*p[22] - p[7]*p[14]*p[20] + p[8]*p[12]*p[17] + p[8]*p[13]*p[20] + p[8]*p[14]*p[22] + p[9]*p[11]*p[22] + p[9]*p[12]*p[20] - p[9]*p[13]*p[17] - p[10]*p[11]*p[20] + p[10]*p[12]*p[22] - p[10]*p[14]*p[17];
- coeff[42] = 2*(p[7]*p[9]*p[23] - p[7]*p[9]*p[32] - p[7]*p[10]*p[21] + p[7]*p[10]*p[30] + p[8]*p[9]*p[21] - p[8]*p[9]*p[30] + p[8]*p[10]*p[23] - p[8]*p[10]*p[32] - p[9]*p[9]*p[18] + p[9]*p[9]*p[27] - p[10]*p[10]*p[18] + p[10]*p[10]*p[27] - p[11]*p[13]*p[23] + p[11]*p[13]*p[32] + p[11]*p[14]*p[21] - p[11]*p[14]*p[30] - p[12]*p[13]*p[21] + p[12]*p[13]*p[30] - p[12]*p[14]*p[23] + p[12]*p[14]*p[32] + p[13]*p[13]*p[18] - p[13]*p[13]*p[27] + p[14]*p[14]*p[18] - p[14]*p[14]*p[27])*p[0];
- coeff[43] = -4*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[9]*p[9]*p[24] - 4*p[0]*p[10]*p[10]*p[15] + 2*p[0]*p[10]*p[10]*p[24] + 4*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[13]*p[13]*p[24] + 4*p[0]*p[14]*p[14]*p[15] - 2*p[0]*p[14]*p[14]*p[24] + 2*p[7]*p[11]*p[15] - 2*p[7]*p[11]*p[24] + 2*p[8]*p[12]*p[15] - 2*p[8]*p[12]*p[24] - 2*p[9]*p[13]*p[15] + 2*p[9]*p[13]*p[24] - 2*p[10]*p[14]*p[15] + 2*p[10]*p[14]*p[24];
- coeff[44] = -4*p[0]*p[7]*p[10]*p[19] + 2*p[0]*p[7]*p[10]*p[28] + 4*p[0]*p[8]*p[9]*p[19] - 2*p[0]*p[8]*p[9]*p[28] - 4*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[9]*p[9]*p[25] - 4*p[0]*p[10]*p[10]*p[16] + 2*p[0]*p[10]*p[10]*p[25] + 4*p[0]*p[11]*p[14]*p[19] - 2*p[0]*p[11]*p[14]*p[28] - 4*p[0]*p[12]*p[13]*p[19] + 2*p[0]*p[12]*p[13]*p[28] + 4*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[13]*p[13]*p[25] + 4*p[0]*p[14]*p[14]*p[16] - 2*p[0]*p[14]*p[14]*p[25] + 2*p[7]*p[11]*p[16] - 2*p[7]*p[11]*p[25] - 2*p[7]*p[14]*p[19] + 2*p[7]*p[14]*p[28] + 2*p[8]*p[12]*p[16] - 2*p[8]*p[12]*p[25] + 2*p[8]*p[13]*p[19] - 2*p[8]*p[13]*p[28] + 2*p[9]*p[12]*p[19] - 2*p[9]*p[12]*p[28] - 2*p[9]*p[13]*p[16] + 2*p[9]*p[13]*p[25] - 2*p[10]*p[11]*p[19] + 2*p[10]*p[11]*p[28] - 2*p[10]*p[14]*p[16] + 2*p[10]*p[14]*p[25];
- coeff[45] = 4*p[0]*p[7]*p[9]*p[22] - 2*p[0]*p[7]*p[9]*p[31] - 4*p[0]*p[7]*p[10]*p[20] + 2*p[0]*p[7]*p[10]*p[29] + 4*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[9]*p[29] + 4*p[0]*p[8]*p[10]*p[22] - 2*p[0]*p[8]*p[10]*p[31] - 4*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[9]*p[9]*p[26] - 4*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[10]*p[10]*p[26] - 4*p[0]*p[11]*p[13]*p[22] + 2*p[0]*p[11]*p[13]*p[31] + 4*p[0]*p[11]*p[14]*p[20] - 2*p[0]*p[11]*p[14]*p[29] - 4*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[13]*p[29] - 4*p[0]*p[12]*p[14]*p[22] + 2*p[0]*p[12]*p[14]*p[31] + 4*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[13]*p[13]*p[26] + 4*p[0]*p[14]*p[14]*p[17] - 2*p[0]*p[14]*p[14]*p[26] + 2*p[7]*p[11]*p[17] - 2*p[7]*p[11]*p[26] + 2*p[7]*p[13]*p[22] - 2*p[7]*p[13]*p[31] - 2*p[7]*p[14]*p[20] + 2*p[7]*p[14]*p[29] + 2*p[8]*p[12]*p[17] - 2*p[8]*p[12]*p[26] + 2*p[8]*p[13]*p[20] - 2*p[8]*p[13]*p[29] + 2*p[8]*p[14]*p[22] - 2*p[8]*p[14]*p[31] + 2*p[9]*p[11]*p[22] - 2*p[9]*p[11]*p[31] + 2*p[9]*p[12]*p[20] - 2*p[9]*p[12]*p[29] - 2*p[9]*p[13]*p[17] + 2*p[9]*p[13]*p[26] - 2*p[10]*p[11]*p[20] + 2*p[10]*p[11]*p[29] + 2*p[10]*p[12]*p[22] - 2*p[10]*p[12]*p[31] - 2*p[10]*p[14]*p[17] + 2*p[10]*p[14]*p[26];
- coeff[46] = 2*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[10]*p[10]*p[15] - 2*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[14]*p[14]*p[15] - 2*p[7]*p[11]*p[15] + 2*p[7]*p[11]*p[24] - 2*p[8]*p[12]*p[15] + 2*p[8]*p[12]*p[24] + 2*p[9]*p[13]*p[15] - 2*p[9]*p[13]*p[24] + 2*p[10]*p[14]*p[15] - 2*p[10]*p[14]*p[24];
- coeff[47] = 2*p[0]*p[7]*p[10]*p[19] - 2*p[0]*p[8]*p[9]*p[19] + 2*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[16] - 2*p[0]*p[11]*p[14]*p[19] + 2*p[0]*p[12]*p[13]*p[19] - 2*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[16] - 2*p[7]*p[11]*p[16] + 2*p[7]*p[11]*p[25] + 2*p[7]*p[14]*p[19] - 2*p[7]*p[14]*p[28] - 2*p[8]*p[12]*p[16] + 2*p[8]*p[12]*p[25] - 2*p[8]*p[13]*p[19] + 2*p[8]*p[13]*p[28] - 2*p[9]*p[12]*p[19] + 2*p[9]*p[12]*p[28] + 2*p[9]*p[13]*p[16] - 2*p[9]*p[13]*p[25] + 2*p[10]*p[11]*p[19] - 2*p[10]*p[11]*p[28] + 2*p[10]*p[14]*p[16] - 2*p[10]*p[14]*p[25];
- coeff[48] = -2*p[0]*p[7]*p[9]*p[22] + 2*p[0]*p[7]*p[10]*p[20] - 2*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[10]*p[22] + 2*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[11]*p[13]*p[22] - 2*p[0]*p[11]*p[14]*p[20] + 2*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[14]*p[22] - 2*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[14]*p[14]*p[17] - 2*p[7]*p[11]*p[17] + 2*p[7]*p[11]*p[26] - 2*p[7]*p[13]*p[22] + 2*p[7]*p[13]*p[31] + 2*p[7]*p[14]*p[20] - 2*p[7]*p[14]*p[29] - 2*p[8]*p[12]*p[17] + 2*p[8]*p[12]*p[26] - 2*p[8]*p[13]*p[20] + 2*p[8]*p[13]*p[29] - 2*p[8]*p[14]*p[22] + 2*p[8]*p[14]*p[31] - 2*p[9]*p[11]*p[22] + 2*p[9]*p[11]*p[31] - 2*p[9]*p[12]*p[20] + 2*p[9]*p[12]*p[29] + 2*p[9]*p[13]*p[17] - 2*p[9]*p[13]*p[26] + 2*p[10]*p[11]*p[20] - 2*p[10]*p[11]*p[29] - 2*p[10]*p[12]*p[22] + 2*p[10]*p[12]*p[31] + 2*p[10]*p[14]*p[17] - 2*p[10]*p[14]*p[26];
- coeff[49] = 0;
- coeff[50] = 2*(p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + p[10]*p[10]*p[15] - p[10]*p[10]*p[24] - p[13]*p[13]*p[15] + p[13]*p[13]*p[24] - p[14]*p[14]*p[15] + p[14]*p[14]*p[24])*p[0];
- coeff[51] = 2*(p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + p[10]*p[10]*p[16] - p[10]*p[10]*p[25] - p[11]*p[14]*p[19] + p[11]*p[14]*p[28] + p[12]*p[13]*p[19] - p[12]*p[13]*p[28] - p[13]*p[13]*p[16] + p[13]*p[13]*p[25] - p[14]*p[14]*p[16] + p[14]*p[14]*p[25])*p[0];
- coeff[52] = 2*(-p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + p[10]*p[10]*p[17] - p[10]*p[10]*p[26] + p[11]*p[13]*p[22] - p[11]*p[13]*p[31] - p[11]*p[14]*p[20] + p[11]*p[14]*p[29] + p[12]*p[13]*p[20] - p[12]*p[13]*p[29] + p[12]*p[14]*p[22] - p[12]*p[14]*p[31] - p[13]*p[13]*p[17] + p[13]*p[13]*p[26] - p[14]*p[14]*p[17] + p[14]*p[14]*p[26])*p[0];
- coeff[53] = 2*(-p[9]*p[9]*p[15] + p[9]*p[9]*p[24] - p[10]*p[10]*p[15] + p[10]*p[10]*p[24] + p[13]*p[13]*p[15] - p[13]*p[13]*p[24] + p[14]*p[14]*p[15] - p[14]*p[14]*p[24])*p[0];
- coeff[54] = 2*(-p[7]*p[10]*p[19] + p[7]*p[10]*p[28] + p[8]*p[9]*p[19] - p[8]*p[9]*p[28] - p[9]*p[9]*p[16] + p[9]*p[9]*p[25] - p[10]*p[10]*p[16] + p[10]*p[10]*p[25] + p[11]*p[14]*p[19] - p[11]*p[14]*p[28] - p[12]*p[13]*p[19] + p[12]*p[13]*p[28] + p[13]*p[13]*p[16] - p[13]*p[13]*p[25] + p[14]*p[14]*p[16] - p[14]*p[14]*p[25])*p[0];
- coeff[55] = 2*(p[7]*p[9]*p[22] - p[7]*p[9]*p[31] - p[7]*p[10]*p[20] + p[7]*p[10]*p[29] + p[8]*p[9]*p[20] - p[8]*p[9]*p[29] + p[8]*p[10]*p[22] - p[8]*p[10]*p[31] - p[9]*p[9]*p[17] + p[9]*p[9]*p[26] - p[10]*p[10]*p[17] + p[10]*p[10]*p[26] - p[11]*p[13]*p[22] + p[11]*p[13]*p[31] + p[11]*p[14]*p[20] - p[11]*p[14]*p[29] - p[12]*p[13]*p[20] + p[12]*p[13]*p[29] - p[12]*p[14]*p[22] + p[12]*p[14]*p[31] + p[13]*p[13]*p[17] - p[13]*p[13]*p[26] + p[14]*p[14]*p[17] - p[14]*p[14]*p[26])*p[0];
- coeff[56] = -p[2] + p[5] + p[7]*p[8]*p[23] - p[7]*p[8]*p[32] - p[7]*p[10]*p[18] + p[7]*p[10]*p[27] + p[8]*p[8]*p[21] - p[8]*p[8]*p[30] - p[8]*p[9]*p[18] + p[8]*p[9]*p[27] - p[9]*p[10]*p[23] + p[9]*p[10]*p[32] + p[10]*p[10]*p[21] - p[10]*p[10]*p[30] + p[11]*p[12]*p[23] - p[11]*p[12]*p[32] - p[11]*p[14]*p[18] + p[11]*p[14]*p[27] + p[12]*p[12]*p[21] - p[12]*p[12]*p[30] - p[12]*p[13]*p[18] + p[12]*p[13]*p[27] - p[13]*p[14]*p[23] + p[13]*p[14]*p[32] + p[14]*p[14]*p[21] - p[14]*p[14]*p[30] - p[21] + p[30];
- coeff[57] = -2*p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + p[12]*p[13]*p[24];
- coeff[58] = -2*p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - p[14]*p[14]*p[28] - 2*p[19] + p[28];
- coeff[59] = 2*p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - p[14]*p[14]*p[29] - 2*p[20] + p[29];
- coeff[60] = (p[7]*p[10] + p[8]*p[9] + p[11]*p[14] + p[12]*p[13])*p[15];
- coeff[61] = p[7]*p[10]*p[16] - p[8]*p[8]*p[19] + p[8]*p[9]*p[16] - p[10]*p[10]*p[19] + p[11]*p[14]*p[16] - p[12]*p[12]*p[19] + p[12]*p[13]*p[16] - p[14]*p[14]*p[19] + p[19];
- coeff[62] = -p[7]*p[8]*p[22] + p[7]*p[10]*p[17] - p[8]*p[8]*p[20] + p[8]*p[9]*p[17] + p[9]*p[10]*p[22] - p[10]*p[10]*p[20] - p[11]*p[12]*p[22] + p[11]*p[14]*p[17] - p[12]*p[12]*p[20] + p[12]*p[13]*p[17] + p[13]*p[14]*p[22] - p[14]*p[14]*p[20] + p[20];
- coeff[63] = 0;
- coeff[64] = 2*p[7]*p[10]*p[15] - 2*p[7]*p[10]*p[24] + 2*p[8]*p[9]*p[15] - 2*p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - 2*p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - 2*p[12]*p[13]*p[24];
- coeff[65] = 2*p[7]*p[10]*p[16] - 2*p[7]*p[10]*p[25] - 2*p[8]*p[8]*p[19] + 2*p[8]*p[8]*p[28] + 2*p[8]*p[9]*p[16] - 2*p[8]*p[9]*p[25] - 2*p[10]*p[10]*p[19] + 2*p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - 2*p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + 2*p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - 2*p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + 2*p[14]*p[14]*p[28] + 2*p[19] - 2*p[28];
- coeff[66] = -2*p[7]*p[8]*p[22] + 2*p[7]*p[8]*p[31] + 2*p[7]*p[10]*p[17] - 2*p[7]*p[10]*p[26] - 2*p[8]*p[8]*p[20] + 2*p[8]*p[8]*p[29] + 2*p[8]*p[9]*p[17] - 2*p[8]*p[9]*p[26] + 2*p[9]*p[10]*p[22] - 2*p[9]*p[10]*p[31] - 2*p[10]*p[10]*p[20] + 2*p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + 2*p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - 2*p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + 2*p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - 2*p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - 2*p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + 2*p[14]*p[14]*p[29] + 2*p[20] - 2*p[29];
- coeff[67] = -2*p[7]*p[10]*p[15] + 2*p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + 2*p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + 2*p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + 2*p[12]*p[13]*p[24];
- coeff[68] = -2*p[7]*p[10]*p[16] + 2*p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - 2*p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + 2*p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - 2*p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + 2*p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - 2*p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + 2*p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - 2*p[14]*p[14]*p[28] - 2*p[19] + 2*p[28];
- coeff[69] = 2*p[7]*p[8]*p[22] - 2*p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + 2*p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - 2*p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + 2*p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + 2*p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - 2*p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - 2*p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + 2*p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - 2*p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + 2*p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + 2*p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - 2*p[14]*p[14]*p[29] - 2*p[20] + 2*p[29];
- coeff[70] = 2*p[0]*p[7]*p[11]*p[21] - 2*p[0]*p[7]*p[12]*p[23] + 2*p[0]*p[7]*p[14]*p[18] - 2*p[0]*p[8]*p[11]*p[23] - 2*p[0]*p[8]*p[12]*p[21] + 2*p[0]*p[8]*p[13]*p[18] + 2*p[0]*p[9]*p[12]*p[18] + 2*p[0]*p[9]*p[13]*p[21] + 2*p[0]*p[9]*p[14]*p[23] + 2*p[0]*p[10]*p[11]*p[18] + 2*p[0]*p[10]*p[13]*p[23] - 2*p[0]*p[10]*p[14]*p[21] + p[7]*p[8]*p[23] - p[7]*p[8]*p[32] - p[7]*p[10]*p[18] + p[7]*p[10]*p[27] + p[8]*p[8]*p[21] - p[8]*p[8]*p[30] - p[8]*p[9]*p[18] + p[8]*p[9]*p[27] - p[9]*p[10]*p[23] + p[9]*p[10]*p[32] + p[10]*p[10]*p[21] - p[10]*p[10]*p[30] - p[11]*p[12]*p[23] + p[11]*p[12]*p[32] + p[11]*p[14]*p[18] - p[11]*p[14]*p[27] - p[12]*p[12]*p[21] + p[12]*p[12]*p[30] + p[12]*p[13]*p[18] - p[12]*p[13]*p[27] + p[13]*p[14]*p[23] - p[13]*p[14]*p[32] - p[14]*p[14]*p[21] + p[14]*p[14]*p[30];
- coeff[71] = 2*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[10]*p[11]*p[15] - 2*p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - p[12]*p[13]*p[24];
- coeff[72] = 2*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[14]*p[16] - 2*p[0]*p[8]*p[12]*p[19] + 2*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[10]*p[11]*p[16] - 2*p[0]*p[10]*p[14]*p[19] - 2*p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + p[14]*p[14]*p[28];
- coeff[73] = 2*p[0]*p[7]*p[11]*p[20] - 2*p[0]*p[7]*p[12]*p[22] + 2*p[0]*p[7]*p[14]*p[17] - 2*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[12]*p[20] + 2*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[13]*p[22] - 2*p[0]*p[10]*p[14]*p[20] + 2*p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + p[14]*p[14]*p[29];
- coeff[74] = (p[7]*p[10] + p[8]*p[9] - p[11]*p[14] - p[12]*p[13])*p[15];
- coeff[75] = p[7]*p[10]*p[16] - p[8]*p[8]*p[19] + p[8]*p[9]*p[16] - p[10]*p[10]*p[19] - p[11]*p[14]*p[16] + p[12]*p[12]*p[19] - p[12]*p[13]*p[16] + p[14]*p[14]*p[19];
- coeff[76] = -p[7]*p[8]*p[22] + p[7]*p[10]*p[17] - p[8]*p[8]*p[20] + p[8]*p[9]*p[17] + p[9]*p[10]*p[22] - p[10]*p[10]*p[20] + p[11]*p[12]*p[22] - p[11]*p[14]*p[17] + p[12]*p[12]*p[20] - p[12]*p[13]*p[17] - p[13]*p[14]*p[22] + p[14]*p[14]*p[20];
- coeff[77] = 2*(-p[7]*p[11]*p[21] + p[7]*p[11]*p[30] + p[7]*p[12]*p[23] - p[7]*p[12]*p[32] - p[7]*p[14]*p[18] + p[7]*p[14]*p[27] + p[8]*p[11]*p[23] - p[8]*p[11]*p[32] + p[8]*p[12]*p[21] - p[8]*p[12]*p[30] - p[8]*p[13]*p[18] + p[8]*p[13]*p[27] - p[9]*p[12]*p[18] + p[9]*p[12]*p[27] - p[9]*p[13]*p[21] + p[9]*p[13]*p[30] - p[9]*p[14]*p[23] + p[9]*p[14]*p[32] - p[10]*p[11]*p[18] + p[10]*p[11]*p[27] - p[10]*p[13]*p[23] + p[10]*p[13]*p[32] + p[10]*p[14]*p[21] - p[10]*p[14]*p[30])*p[0];
- coeff[78] = -4*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[7]*p[14]*p[24] - 4*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[8]*p[13]*p[24] - 4*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[9]*p[12]*p[24] - 4*p[0]*p[10]*p[11]*p[15] + 2*p[0]*p[10]*p[11]*p[24] + 2*p[7]*p[10]*p[15] - 2*p[7]*p[10]*p[24] + 2*p[8]*p[9]*p[15] - 2*p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + 2*p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + 2*p[12]*p[13]*p[24];
- coeff[79] = -4*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[11]*p[28] - 4*p[0]*p[7]*p[14]*p[16] + 2*p[0]*p[7]*p[14]*p[25] + 4*p[0]*p[8]*p[12]*p[19] - 2*p[0]*p[8]*p[12]*p[28] - 4*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[8]*p[13]*p[25] - 4*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[12]*p[25] - 4*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[9]*p[13]*p[28] - 4*p[0]*p[10]*p[11]*p[16] + 2*p[0]*p[10]*p[11]*p[25] + 4*p[0]*p[10]*p[14]*p[19] - 2*p[0]*p[10]*p[14]*p[28] + 2*p[7]*p[10]*p[16] - 2*p[7]*p[10]*p[25] - 2*p[8]*p[8]*p[19] + 2*p[8]*p[8]*p[28] + 2*p[8]*p[9]*p[16] - 2*p[8]*p[9]*p[25] - 2*p[10]*p[10]*p[19] + 2*p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + 2*p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - 2*p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + 2*p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - 2*p[14]*p[14]*p[28];
- coeff[80] = -4*p[0]*p[7]*p[11]*p[20] + 2*p[0]*p[7]*p[11]*p[29] + 4*p[0]*p[7]*p[12]*p[22] - 2*p[0]*p[7]*p[12]*p[31] - 4*p[0]*p[7]*p[14]*p[17] + 2*p[0]*p[7]*p[14]*p[26] + 4*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[11]*p[31] + 4*p[0]*p[8]*p[12]*p[20] - 2*p[0]*p[8]*p[12]*p[29] - 4*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[8]*p[13]*p[26] - 4*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[12]*p[26] - 4*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[13]*p[29] - 4*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[9]*p[14]*p[31] - 4*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[11]*p[26] - 4*p[0]*p[10]*p[13]*p[22] + 2*p[0]*p[10]*p[13]*p[31] + 4*p[0]*p[10]*p[14]*p[20] - 2*p[0]*p[10]*p[14]*p[29] - 2*p[7]*p[8]*p[22] + 2*p[7]*p[8]*p[31] + 2*p[7]*p[10]*p[17] - 2*p[7]*p[10]*p[26] - 2*p[8]*p[8]*p[20] + 2*p[8]*p[8]*p[29] + 2*p[8]*p[9]*p[17] - 2*p[8]*p[9]*p[26] + 2*p[9]*p[10]*p[22] - 2*p[9]*p[10]*p[31] - 2*p[10]*p[10]*p[20] + 2*p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - 2*p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + 2*p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - 2*p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + 2*p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + 2*p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - 2*p[14]*p[14]*p[29];
- coeff[81] = 2*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[10]*p[11]*p[15] - 2*p[7]*p[10]*p[15] + 2*p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + 2*p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - 2*p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - 2*p[12]*p[13]*p[24];
- coeff[82] = 2*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[14]*p[16] - 2*p[0]*p[8]*p[12]*p[19] + 2*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[10]*p[11]*p[16] - 2*p[0]*p[10]*p[14]*p[19] - 2*p[7]*p[10]*p[16] + 2*p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - 2*p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + 2*p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - 2*p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - 2*p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + 2*p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - 2*p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + 2*p[14]*p[14]*p[28];
- coeff[83] = 2*p[0]*p[7]*p[11]*p[20] - 2*p[0]*p[7]*p[12]*p[22] + 2*p[0]*p[7]*p[14]*p[17] - 2*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[12]*p[20] + 2*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[13]*p[22] - 2*p[0]*p[10]*p[14]*p[20] + 2*p[7]*p[8]*p[22] - 2*p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + 2*p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - 2*p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + 2*p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + 2*p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - 2*p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + 2*p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - 2*p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + 2*p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - 2*p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - 2*p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + 2*p[14]*p[14]*p[29];
- coeff[84] = 0;
- coeff[85] = 2*(p[7]*p[14]*p[15] - p[7]*p[14]*p[24] + p[8]*p[13]*p[15] - p[8]*p[13]*p[24] + p[9]*p[12]*p[15] - p[9]*p[12]*p[24] + p[10]*p[11]*p[15] - p[10]*p[11]*p[24])*p[0];
- coeff[86] = 2*(p[7]*p[11]*p[19] - p[7]*p[11]*p[28] + p[7]*p[14]*p[16] - p[7]*p[14]*p[25] - p[8]*p[12]*p[19] + p[8]*p[12]*p[28] + p[8]*p[13]*p[16] - p[8]*p[13]*p[25] + p[9]*p[12]*p[16] - p[9]*p[12]*p[25] + p[9]*p[13]*p[19] - p[9]*p[13]*p[28] + p[10]*p[11]*p[16] - p[10]*p[11]*p[25] - p[10]*p[14]*p[19] + p[10]*p[14]*p[28])*p[0];
- coeff[87] = 2*(p[7]*p[11]*p[20] - p[7]*p[11]*p[29] - p[7]*p[12]*p[22] + p[7]*p[12]*p[31] + p[7]*p[14]*p[17] - p[7]*p[14]*p[26] - p[8]*p[11]*p[22] + p[8]*p[11]*p[31] - p[8]*p[12]*p[20] + p[8]*p[12]*p[29] + p[8]*p[13]*p[17] - p[8]*p[13]*p[26] + p[9]*p[12]*p[17] - p[9]*p[12]*p[26] + p[9]*p[13]*p[20] - p[9]*p[13]*p[29] + p[9]*p[14]*p[22] - p[9]*p[14]*p[31] + p[10]*p[11]*p[17] - p[10]*p[11]*p[26] + p[10]*p[13]*p[22] - p[10]*p[13]*p[31] - p[10]*p[14]*p[20] + p[10]*p[14]*p[29])*p[0];
- coeff[88] = 2*(-p[7]*p[14]*p[15] + p[7]*p[14]*p[24] - p[8]*p[13]*p[15] + p[8]*p[13]*p[24] - p[9]*p[12]*p[15] + p[9]*p[12]*p[24] - p[10]*p[11]*p[15] + p[10]*p[11]*p[24])*p[0];
- coeff[89] = 2*(-p[7]*p[11]*p[19] + p[7]*p[11]*p[28] - p[7]*p[14]*p[16] + p[7]*p[14]*p[25] + p[8]*p[12]*p[19] - p[8]*p[12]*p[28] - p[8]*p[13]*p[16] + p[8]*p[13]*p[25] - p[9]*p[12]*p[16] + p[9]*p[12]*p[25] - p[9]*p[13]*p[19] + p[9]*p[13]*p[28] - p[10]*p[11]*p[16] + p[10]*p[11]*p[25] + p[10]*p[14]*p[19] - p[10]*p[14]*p[28])*p[0];
- coeff[90] = 2*(-p[7]*p[11]*p[20] + p[7]*p[11]*p[29] + p[7]*p[12]*p[22] - p[7]*p[12]*p[31] - p[7]*p[14]*p[17] + p[7]*p[14]*p[26] + p[8]*p[11]*p[22] - p[8]*p[11]*p[31] + p[8]*p[12]*p[20] - p[8]*p[12]*p[29] - p[8]*p[13]*p[17] + p[8]*p[13]*p[26] - p[9]*p[12]*p[17] + p[9]*p[12]*p[26] - p[9]*p[13]*p[20] + p[9]*p[13]*p[29] - p[9]*p[14]*p[22] + p[9]*p[14]*p[31] - p[10]*p[11]*p[17] + p[10]*p[11]*p[26] - p[10]*p[13]*p[22] + p[10]*p[13]*p[31] + p[10]*p[14]*p[20] - p[10]*p[14]*p[29])*p[0];
- coeff[91] = 2*p[0]*p[7]*p[8]*p[23] - 2*p[0]*p[7]*p[10]*p[18] + 2*p[0]*p[8]*p[8]*p[21] - 2*p[0]*p[8]*p[9]*p[18] - 2*p[0]*p[9]*p[10]*p[23] + 2*p[0]*p[10]*p[10]*p[21] - 2*p[0]*p[11]*p[12]*p[23] + 2*p[0]*p[11]*p[14]*p[18] - 2*p[0]*p[12]*p[12]*p[21] + 2*p[0]*p[12]*p[13]*p[18] + 2*p[0]*p[13]*p[14]*p[23] - 2*p[0]*p[14]*p[14]*p[21] - p[7]*p[11]*p[21] + p[7]*p[11]*p[30] + p[7]*p[12]*p[23] - p[7]*p[12]*p[32] - p[7]*p[14]*p[18] + p[7]*p[14]*p[27] + p[8]*p[11]*p[23] - p[8]*p[11]*p[32] + p[8]*p[12]*p[21] - p[8]*p[12]*p[30] - p[8]*p[13]*p[18] + p[8]*p[13]*p[27] - p[9]*p[12]*p[18] + p[9]*p[12]*p[27] - p[9]*p[13]*p[21] + p[9]*p[13]*p[30] - p[9]*p[14]*p[23] + p[9]*p[14]*p[32] - p[10]*p[11]*p[18] + p[10]*p[11]*p[27] - p[10]*p[13]*p[23] + p[10]*p[13]*p[32] + p[10]*p[14]*p[21] - p[10]*p[14]*p[30];
- coeff[92] = -2*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[8]*p[9]*p[15] + 2*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[12]*p[13]*p[15] - 2*p[7]*p[14]*p[15] + p[7]*p[14]*p[24] - 2*p[8]*p[13]*p[15] + p[8]*p[13]*p[24] - 2*p[9]*p[12]*p[15] + p[9]*p[12]*p[24] - 2*p[10]*p[11]*p[15] + p[10]*p[11]*p[24];
- coeff[93] = -2*p[0]*p[7]*p[10]*p[16] + 2*p[0]*p[8]*p[8]*p[19] - 2*p[0]*p[8]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[11]*p[14]*p[16] - 2*p[0]*p[12]*p[12]*p[19] + 2*p[0]*p[12]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[19] - 2*p[7]*p[11]*p[19] + p[7]*p[11]*p[28] - 2*p[7]*p[14]*p[16] + p[7]*p[14]*p[25] + 2*p[8]*p[12]*p[19] - p[8]*p[12]*p[28] - 2*p[8]*p[13]*p[16] + p[8]*p[13]*p[25] - 2*p[9]*p[12]*p[16] + p[9]*p[12]*p[25] - 2*p[9]*p[13]*p[19] + p[9]*p[13]*p[28] - 2*p[10]*p[11]*p[16] + p[10]*p[11]*p[25] + 2*p[10]*p[14]*p[19] - p[10]*p[14]*p[28];
- coeff[94] = 2*p[0]*p[7]*p[8]*p[22] - 2*p[0]*p[7]*p[10]*p[17] + 2*p[0]*p[8]*p[8]*p[20] - 2*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[9]*p[10]*p[22] + 2*p[0]*p[10]*p[10]*p[20] - 2*p[0]*p[11]*p[12]*p[22] + 2*p[0]*p[11]*p[14]*p[17] - 2*p[0]*p[12]*p[12]*p[20] + 2*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[13]*p[14]*p[22] - 2*p[0]*p[14]*p[14]*p[20] - 2*p[7]*p[11]*p[20] + p[7]*p[11]*p[29] + 2*p[7]*p[12]*p[22] - p[7]*p[12]*p[31] - 2*p[7]*p[14]*p[17] + p[7]*p[14]*p[26] + 2*p[8]*p[11]*p[22] - p[8]*p[11]*p[31] + 2*p[8]*p[12]*p[20] - p[8]*p[12]*p[29] - 2*p[8]*p[13]*p[17] + p[8]*p[13]*p[26] - 2*p[9]*p[12]*p[17] + p[9]*p[12]*p[26] - 2*p[9]*p[13]*p[20] + p[9]*p[13]*p[29] - 2*p[9]*p[14]*p[22] + p[9]*p[14]*p[31] - 2*p[10]*p[11]*p[17] + p[10]*p[11]*p[26] - 2*p[10]*p[13]*p[22] + p[10]*p[13]*p[31] + 2*p[10]*p[14]*p[20] - p[10]*p[14]*p[29];
- coeff[95] = (p[7]*p[14] + p[8]*p[13] + p[9]*p[12] + p[10]*p[11])*p[15];
- coeff[96] = p[7]*p[11]*p[19] + p[7]*p[14]*p[16] - p[8]*p[12]*p[19] + p[8]*p[13]*p[16] + p[9]*p[12]*p[16] + p[9]*p[13]*p[19] + p[10]*p[11]*p[16] - p[10]*p[14]*p[19];
- coeff[97] = p[7]*p[11]*p[20] - p[7]*p[12]*p[22] + p[7]*p[14]*p[17] - p[8]*p[11]*p[22] - p[8]*p[12]*p[20] + p[8]*p[13]*p[17] + p[9]*p[12]*p[17] + p[9]*p[13]*p[20] + p[9]*p[14]*p[22] + p[10]*p[11]*p[17] + p[10]*p[13]*p[22] - p[10]*p[14]*p[20];
- coeff[98] = 2*(-p[7]*p[8]*p[23] + p[7]*p[8]*p[32] + p[7]*p[10]*p[18] - p[7]*p[10]*p[27] - p[8]*p[8]*p[21] + p[8]*p[8]*p[30] + p[8]*p[9]*p[18] - p[8]*p[9]*p[27] + p[9]*p[10]*p[23] - p[9]*p[10]*p[32] - p[10]*p[10]*p[21] + p[10]*p[10]*p[30] + p[11]*p[12]*p[23] - p[11]*p[12]*p[32] - p[11]*p[14]*p[18] + p[11]*p[14]*p[27] + p[12]*p[12]*p[21] - p[12]*p[12]*p[30] - p[12]*p[13]*p[18] + p[12]*p[13]*p[27] - p[13]*p[14]*p[23] + p[13]*p[14]*p[32] + p[14]*p[14]*p[21] - p[14]*p[14]*p[30])*p[0];
- coeff[99] = 4*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[7]*p[10]*p[24] + 4*p[0]*p[8]*p[9]*p[15] - 2*p[0]*p[8]*p[9]*p[24] - 4*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[11]*p[14]*p[24] - 4*p[0]*p[12]*p[13]*p[15] + 2*p[0]*p[12]*p[13]*p[24] + 2*p[7]*p[14]*p[15] - 2*p[7]*p[14]*p[24] + 2*p[8]*p[13]*p[15] - 2*p[8]*p[13]*p[24] + 2*p[9]*p[12]*p[15] - 2*p[9]*p[12]*p[24] + 2*p[10]*p[11]*p[15] - 2*p[10]*p[11]*p[24];
- coeff[100] = 4*p[0]*p[7]*p[10]*p[16] - 2*p[0]*p[7]*p[10]*p[25] - 4*p[0]*p[8]*p[8]*p[19] + 2*p[0]*p[8]*p[8]*p[28] + 4*p[0]*p[8]*p[9]*p[16] - 2*p[0]*p[8]*p[9]*p[25] - 4*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[10]*p[10]*p[28] - 4*p[0]*p[11]*p[14]*p[16] + 2*p[0]*p[11]*p[14]*p[25] + 4*p[0]*p[12]*p[12]*p[19] - 2*p[0]*p[12]*p[12]*p[28] - 4*p[0]*p[12]*p[13]*p[16] + 2*p[0]*p[12]*p[13]*p[25] + 4*p[0]*p[14]*p[14]*p[19] - 2*p[0]*p[14]*p[14]*p[28] + 2*p[7]*p[11]*p[19] - 2*p[7]*p[11]*p[28] + 2*p[7]*p[14]*p[16] - 2*p[7]*p[14]*p[25] - 2*p[8]*p[12]*p[19] + 2*p[8]*p[12]*p[28] + 2*p[8]*p[13]*p[16] - 2*p[8]*p[13]*p[25] + 2*p[9]*p[12]*p[16] - 2*p[9]*p[12]*p[25] + 2*p[9]*p[13]*p[19] - 2*p[9]*p[13]*p[28] + 2*p[10]*p[11]*p[16] - 2*p[10]*p[11]*p[25] - 2*p[10]*p[14]*p[19] + 2*p[10]*p[14]*p[28];
- coeff[101] = -4*p[0]*p[7]*p[8]*p[22] + 2*p[0]*p[7]*p[8]*p[31] + 4*p[0]*p[7]*p[10]*p[17] - 2*p[0]*p[7]*p[10]*p[26] - 4*p[0]*p[8]*p[8]*p[20] + 2*p[0]*p[8]*p[8]*p[29] + 4*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[8]*p[9]*p[26] + 4*p[0]*p[9]*p[10]*p[22] - 2*p[0]*p[9]*p[10]*p[31] - 4*p[0]*p[10]*p[10]*p[20] + 2*p[0]*p[10]*p[10]*p[29] + 4*p[0]*p[11]*p[12]*p[22] - 2*p[0]*p[11]*p[12]*p[31] - 4*p[0]*p[11]*p[14]*p[17] + 2*p[0]*p[11]*p[14]*p[26] + 4*p[0]*p[12]*p[12]*p[20] - 2*p[0]*p[12]*p[12]*p[29] - 4*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[12]*p[13]*p[26] - 4*p[0]*p[13]*p[14]*p[22] + 2*p[0]*p[13]*p[14]*p[31] + 4*p[0]*p[14]*p[14]*p[20] - 2*p[0]*p[14]*p[14]*p[29] + 2*p[7]*p[11]*p[20] - 2*p[7]*p[11]*p[29] - 2*p[7]*p[12]*p[22] + 2*p[7]*p[12]*p[31] + 2*p[7]*p[14]*p[17] - 2*p[7]*p[14]*p[26] - 2*p[8]*p[11]*p[22] + 2*p[8]*p[11]*p[31] - 2*p[8]*p[12]*p[20] + 2*p[8]*p[12]*p[29] + 2*p[8]*p[13]*p[17] - 2*p[8]*p[13]*p[26] + 2*p[9]*p[12]*p[17] - 2*p[9]*p[12]*p[26] + 2*p[9]*p[13]*p[20] - 2*p[9]*p[13]*p[29] + 2*p[9]*p[14]*p[22] - 2*p[9]*p[14]*p[31] + 2*p[10]*p[11]*p[17] - 2*p[10]*p[11]*p[26] + 2*p[10]*p[13]*p[22] - 2*p[10]*p[13]*p[31] - 2*p[10]*p[14]*p[20] + 2*p[10]*p[14]*p[29];
- coeff[102] = -2*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[8]*p[9]*p[15] + 2*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[12]*p[13]*p[15] - 2*p[7]*p[14]*p[15] + 2*p[7]*p[14]*p[24] - 2*p[8]*p[13]*p[15] + 2*p[8]*p[13]*p[24] - 2*p[9]*p[12]*p[15] + 2*p[9]*p[12]*p[24] - 2*p[10]*p[11]*p[15] + 2*p[10]*p[11]*p[24];
- coeff[103] = -2*p[0]*p[7]*p[10]*p[16] + 2*p[0]*p[8]*p[8]*p[19] - 2*p[0]*p[8]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[11]*p[14]*p[16] - 2*p[0]*p[12]*p[12]*p[19] + 2*p[0]*p[12]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[19] - 2*p[7]*p[11]*p[19] + 2*p[7]*p[11]*p[28] - 2*p[7]*p[14]*p[16] + 2*p[7]*p[14]*p[25] + 2*p[8]*p[12]*p[19] - 2*p[8]*p[12]*p[28] - 2*p[8]*p[13]*p[16] + 2*p[8]*p[13]*p[25] - 2*p[9]*p[12]*p[16] + 2*p[9]*p[12]*p[25] - 2*p[9]*p[13]*p[19] + 2*p[9]*p[13]*p[28] - 2*p[10]*p[11]*p[16] + 2*p[10]*p[11]*p[25] + 2*p[10]*p[14]*p[19] - 2*p[10]*p[14]*p[28];
- coeff[104] = 2*p[0]*p[7]*p[8]*p[22] - 2*p[0]*p[7]*p[10]*p[17] + 2*p[0]*p[8]*p[8]*p[20] - 2*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[9]*p[10]*p[22] + 2*p[0]*p[10]*p[10]*p[20] - 2*p[0]*p[11]*p[12]*p[22] + 2*p[0]*p[11]*p[14]*p[17] - 2*p[0]*p[12]*p[12]*p[20] + 2*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[13]*p[14]*p[22] - 2*p[0]*p[14]*p[14]*p[20] - 2*p[7]*p[11]*p[20] + 2*p[7]*p[11]*p[29] + 2*p[7]*p[12]*p[22] - 2*p[7]*p[12]*p[31] - 2*p[7]*p[14]*p[17] + 2*p[7]*p[14]*p[26] + 2*p[8]*p[11]*p[22] - 2*p[8]*p[11]*p[31] + 2*p[8]*p[12]*p[20] - 2*p[8]*p[12]*p[29] - 2*p[8]*p[13]*p[17] + 2*p[8]*p[13]*p[26] - 2*p[9]*p[12]*p[17] + 2*p[9]*p[12]*p[26] - 2*p[9]*p[13]*p[20] + 2*p[9]*p[13]*p[29] - 2*p[9]*p[14]*p[22] + 2*p[9]*p[14]*p[31] - 2*p[10]*p[11]*p[17] + 2*p[10]*p[11]*p[26] - 2*p[10]*p[13]*p[22] + 2*p[10]*p[13]*p[31] + 2*p[10]*p[14]*p[20] - 2*p[10]*p[14]*p[29];
- coeff[105] = 0;
- coeff[106] = 2*(-p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - p[8]*p[9]*p[15] + p[8]*p[9]*p[24] + p[11]*p[14]*p[15] - p[11]*p[14]*p[24] + p[12]*p[13]*p[15] - p[12]*p[13]*p[24])*p[0];
- coeff[107] = 2*(-p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + p[10]*p[10]*p[19] - p[10]*p[10]*p[28] + p[11]*p[14]*p[16] - p[11]*p[14]*p[25] - p[12]*p[12]*p[19] + p[12]*p[12]*p[28] + p[12]*p[13]*p[16] - p[12]*p[13]*p[25] - p[14]*p[14]*p[19] + p[14]*p[14]*p[28])*p[0];
- coeff[108] = 2*(p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + p[10]*p[10]*p[20] - p[10]*p[10]*p[29] - p[11]*p[12]*p[22] + p[11]*p[12]*p[31] + p[11]*p[14]*p[17] - p[11]*p[14]*p[26] - p[12]*p[12]*p[20] + p[12]*p[12]*p[29] + p[12]*p[13]*p[17] - p[12]*p[13]*p[26] + p[13]*p[14]*p[22] - p[13]*p[14]*p[31] - p[14]*p[14]*p[20] + p[14]*p[14]*p[29])*p[0];
- coeff[109] = 2*(p[7]*p[10]*p[15] - p[7]*p[10]*p[24] + p[8]*p[9]*p[15] - p[8]*p[9]*p[24] - p[11]*p[14]*p[15] + p[11]*p[14]*p[24] - p[12]*p[13]*p[15] + p[12]*p[13]*p[24])*p[0];
- coeff[110] = 2*(p[7]*p[10]*p[16] - p[7]*p[10]*p[25] - p[8]*p[8]*p[19] + p[8]*p[8]*p[28] + p[8]*p[9]*p[16] - p[8]*p[9]*p[25] - p[10]*p[10]*p[19] + p[10]*p[10]*p[28] - p[11]*p[14]*p[16] + p[11]*p[14]*p[25] + p[12]*p[12]*p[19] - p[12]*p[12]*p[28] - p[12]*p[13]*p[16] + p[12]*p[13]*p[25] + p[14]*p[14]*p[19] - p[14]*p[14]*p[28])*p[0];
- coeff[111] = 2*(-p[7]*p[8]*p[22] + p[7]*p[8]*p[31] + p[7]*p[10]*p[17] - p[7]*p[10]*p[26] - p[8]*p[8]*p[20] + p[8]*p[8]*p[29] + p[8]*p[9]*p[17] - p[8]*p[9]*p[26] + p[9]*p[10]*p[22] - p[9]*p[10]*p[31] - p[10]*p[10]*p[20] + p[10]*p[10]*p[29] + p[11]*p[12]*p[22] - p[11]*p[12]*p[31] - p[11]*p[14]*p[17] + p[11]*p[14]*p[26] + p[12]*p[12]*p[20] - p[12]*p[12]*p[29] - p[12]*p[13]*p[17] + p[12]*p[13]*p[26] - p[13]*p[14]*p[22] + p[13]*p[14]*p[31] + p[14]*p[14]*p[20] - p[14]*p[14]*p[29])*p[0];
- coeff[112] = -p[3] + p[6] - p[7]*p[8]*p[21] + p[7]*p[8]*p[30] + p[7]*p[9]*p[18] - p[7]*p[9]*p[27] + p[8]*p[8]*p[23] - p[8]*p[8]*p[32] - p[8]*p[10]*p[18] + p[8]*p[10]*p[27] + p[9]*p[9]*p[23] - p[9]*p[9]*p[32] - p[9]*p[10]*p[21] + p[9]*p[10]*p[30] - p[11]*p[12]*p[21] + p[11]*p[12]*p[30] + p[11]*p[13]*p[18] - p[11]*p[13]*p[27] + p[12]*p[12]*p[23] - p[12]*p[12]*p[32] - p[12]*p[14]*p[18] + p[12]*p[14]*p[27] + p[13]*p[13]*p[23] - p[13]*p[13]*p[32] - p[13]*p[14]*p[21] + p[13]*p[14]*p[30] - p[23] + p[32];
- coeff[113] = 2*p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + p[12]*p[14]*p[24];
- coeff[114] = -2*p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + p[13]*p[14]*p[28];
- coeff[115] = -2*p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + p[13]*p[14]*p[29] - 2*p[22] + p[31];
- coeff[116] = (-p[7]*p[9] + p[8]*p[10] - p[11]*p[13] + p[12]*p[14])*p[15];
- coeff[117] = p[7]*p[8]*p[19] - p[7]*p[9]*p[16] + p[8]*p[10]*p[16] + p[9]*p[10]*p[19] + p[11]*p[12]*p[19] - p[11]*p[13]*p[16] + p[12]*p[14]*p[16] + p[13]*p[14]*p[19];
- coeff[118] = p[7]*p[8]*p[20] - p[7]*p[9]*p[17] - p[8]*p[8]*p[22] + p[8]*p[10]*p[17] - p[9]*p[9]*p[22] + p[9]*p[10]*p[20] + p[11]*p[12]*p[20] - p[11]*p[13]*p[17] - p[12]*p[12]*p[22] + p[12]*p[14]*p[17] - p[13]*p[13]*p[22] + p[13]*p[14]*p[20] + p[22];
- coeff[119] = 0;
- coeff[120] = -2*p[7]*p[9]*p[15] + 2*p[7]*p[9]*p[24] + 2*p[8]*p[10]*p[15] - 2*p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + 2*p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - 2*p[12]*p[14]*p[24];
- coeff[121] = 2*p[7]*p[8]*p[19] - 2*p[7]*p[8]*p[28] - 2*p[7]*p[9]*p[16] + 2*p[7]*p[9]*p[25] + 2*p[8]*p[10]*p[16] - 2*p[8]*p[10]*p[25] + 2*p[9]*p[10]*p[19] - 2*p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - 2*p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + 2*p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - 2*p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - 2*p[13]*p[14]*p[28];
- coeff[122] = 2*p[7]*p[8]*p[20] - 2*p[7]*p[8]*p[29] - 2*p[7]*p[9]*p[17] + 2*p[7]*p[9]*p[26] - 2*p[8]*p[8]*p[22] + 2*p[8]*p[8]*p[31] + 2*p[8]*p[10]*p[17] - 2*p[8]*p[10]*p[26] - 2*p[9]*p[9]*p[22] + 2*p[9]*p[9]*p[31] + 2*p[9]*p[10]*p[20] - 2*p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - 2*p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + 2*p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + 2*p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - 2*p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + 2*p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - 2*p[13]*p[14]*p[29] + 2*p[22] - 2*p[31];
- coeff[123] = 2*p[7]*p[9]*p[15] - 2*p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + 2*p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - 2*p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + 2*p[12]*p[14]*p[24];
- coeff[124] = -2*p[7]*p[8]*p[19] + 2*p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - 2*p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + 2*p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + 2*p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + 2*p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - 2*p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + 2*p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + 2*p[13]*p[14]*p[28];
- coeff[125] = -2*p[7]*p[8]*p[20] + 2*p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - 2*p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - 2*p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + 2*p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - 2*p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + 2*p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + 2*p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - 2*p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - 2*p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + 2*p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - 2*p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + 2*p[13]*p[14]*p[29] - 2*p[22] + 2*p[31];
- coeff[126] = 2*p[0]*p[7]*p[11]*p[23] + 2*p[0]*p[7]*p[12]*p[21] - 2*p[0]*p[7]*p[13]*p[18] + 2*p[0]*p[8]*p[11]*p[21] - 2*p[0]*p[8]*p[12]*p[23] + 2*p[0]*p[8]*p[14]*p[18] - 2*p[0]*p[9]*p[11]*p[18] - 2*p[0]*p[9]*p[13]*p[23] + 2*p[0]*p[9]*p[14]*p[21] + 2*p[0]*p[10]*p[12]*p[18] + 2*p[0]*p[10]*p[13]*p[21] + 2*p[0]*p[10]*p[14]*p[23] - p[7]*p[8]*p[21] + p[7]*p[8]*p[30] + p[7]*p[9]*p[18] - p[7]*p[9]*p[27] + p[8]*p[8]*p[23] - p[8]*p[8]*p[32] - p[8]*p[10]*p[18] + p[8]*p[10]*p[27] + p[9]*p[9]*p[23] - p[9]*p[9]*p[32] - p[9]*p[10]*p[21] + p[9]*p[10]*p[30] + p[11]*p[12]*p[21] - p[11]*p[12]*p[30] - p[11]*p[13]*p[18] + p[11]*p[13]*p[27] - p[12]*p[12]*p[23] + p[12]*p[12]*p[32] + p[12]*p[14]*p[18] - p[12]*p[14]*p[27] - p[13]*p[13]*p[23] + p[13]*p[13]*p[32] + p[13]*p[14]*p[21] - p[13]*p[14]*p[30];
- coeff[127] = -2*p[0]*p[7]*p[13]*p[15] + 2*p[0]*p[8]*p[14]*p[15] - 2*p[0]*p[9]*p[11]*p[15] + 2*p[0]*p[10]*p[12]*p[15] + 2*p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - p[12]*p[14]*p[24];
- coeff[128] = 2*p[0]*p[7]*p[12]*p[19] - 2*p[0]*p[7]*p[13]*p[16] + 2*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[14]*p[16] - 2*p[0]*p[9]*p[11]*p[16] + 2*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[13]*p[19] - 2*p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - p[13]*p[14]*p[28];
- coeff[129] = 2*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[12]*p[20] - 2*p[0]*p[7]*p[13]*p[17] + 2*p[0]*p[8]*p[11]*p[20] - 2*p[0]*p[8]*p[12]*p[22] + 2*p[0]*p[8]*p[14]*p[17] - 2*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[13]*p[22] + 2*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[14]*p[22] - 2*p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - p[13]*p[14]*p[29];
- coeff[130] = (-p[7]*p[9] + p[8]*p[10] + p[11]*p[13] - p[12]*p[14])*p[15];
- coeff[131] = p[7]*p[8]*p[19] - p[7]*p[9]*p[16] + p[8]*p[10]*p[16] + p[9]*p[10]*p[19] - p[11]*p[12]*p[19] + p[11]*p[13]*p[16] - p[12]*p[14]*p[16] - p[13]*p[14]*p[19];
- coeff[132] = p[7]*p[8]*p[20] - p[7]*p[9]*p[17] - p[8]*p[8]*p[22] + p[8]*p[10]*p[17] - p[9]*p[9]*p[22] + p[9]*p[10]*p[20] - p[11]*p[12]*p[20] + p[11]*p[13]*p[17] + p[12]*p[12]*p[22] - p[12]*p[14]*p[17] + p[13]*p[13]*p[22] - p[13]*p[14]*p[20];
- coeff[133] = 2*(-p[7]*p[11]*p[23] + p[7]*p[11]*p[32] - p[7]*p[12]*p[21] + p[7]*p[12]*p[30] + p[7]*p[13]*p[18] - p[7]*p[13]*p[27] - p[8]*p[11]*p[21] + p[8]*p[11]*p[30] + p[8]*p[12]*p[23] - p[8]*p[12]*p[32] - p[8]*p[14]*p[18] + p[8]*p[14]*p[27] + p[9]*p[11]*p[18] - p[9]*p[11]*p[27] + p[9]*p[13]*p[23] - p[9]*p[13]*p[32] - p[9]*p[14]*p[21] + p[9]*p[14]*p[30] - p[10]*p[12]*p[18] + p[10]*p[12]*p[27] - p[10]*p[13]*p[21] + p[10]*p[13]*p[30] - p[10]*p[14]*p[23] + p[10]*p[14]*p[32])*p[0];
- coeff[134] = 4*p[0]*p[7]*p[13]*p[15] - 2*p[0]*p[7]*p[13]*p[24] - 4*p[0]*p[8]*p[14]*p[15] + 2*p[0]*p[8]*p[14]*p[24] + 4*p[0]*p[9]*p[11]*p[15] - 2*p[0]*p[9]*p[11]*p[24] - 4*p[0]*p[10]*p[12]*p[15] + 2*p[0]*p[10]*p[12]*p[24] - 2*p[7]*p[9]*p[15] + 2*p[7]*p[9]*p[24] + 2*p[8]*p[10]*p[15] - 2*p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - 2*p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + 2*p[12]*p[14]*p[24];
- coeff[135] = -4*p[0]*p[7]*p[12]*p[19] + 2*p[0]*p[7]*p[12]*p[28] + 4*p[0]*p[7]*p[13]*p[16] - 2*p[0]*p[7]*p[13]*p[25] - 4*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[11]*p[28] - 4*p[0]*p[8]*p[14]*p[16] + 2*p[0]*p[8]*p[14]*p[25] + 4*p[0]*p[9]*p[11]*p[16] - 2*p[0]*p[9]*p[11]*p[25] - 4*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[9]*p[14]*p[28] - 4*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[12]*p[25] - 4*p[0]*p[10]*p[13]*p[19] + 2*p[0]*p[10]*p[13]*p[28] + 2*p[7]*p[8]*p[19] - 2*p[7]*p[8]*p[28] - 2*p[7]*p[9]*p[16] + 2*p[7]*p[9]*p[25] + 2*p[8]*p[10]*p[16] - 2*p[8]*p[10]*p[25] + 2*p[9]*p[10]*p[19] - 2*p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + 2*p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - 2*p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + 2*p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + 2*p[13]*p[14]*p[28];
- coeff[136] = -4*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[11]*p[31] - 4*p[0]*p[7]*p[12]*p[20] + 2*p[0]*p[7]*p[12]*p[29] + 4*p[0]*p[7]*p[13]*p[17] - 2*p[0]*p[7]*p[13]*p[26] - 4*p[0]*p[8]*p[11]*p[20] + 2*p[0]*p[8]*p[11]*p[29] + 4*p[0]*p[8]*p[12]*p[22] - 2*p[0]*p[8]*p[12]*p[31] - 4*p[0]*p[8]*p[14]*p[17] + 2*p[0]*p[8]*p[14]*p[26] + 4*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[11]*p[26] + 4*p[0]*p[9]*p[13]*p[22] - 2*p[0]*p[9]*p[13]*p[31] - 4*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[9]*p[14]*p[29] - 4*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[12]*p[26] - 4*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[13]*p[29] - 4*p[0]*p[10]*p[14]*p[22] + 2*p[0]*p[10]*p[14]*p[31] + 2*p[7]*p[8]*p[20] - 2*p[7]*p[8]*p[29] - 2*p[7]*p[9]*p[17] + 2*p[7]*p[9]*p[26] - 2*p[8]*p[8]*p[22] + 2*p[8]*p[8]*p[31] + 2*p[8]*p[10]*p[17] - 2*p[8]*p[10]*p[26] - 2*p[9]*p[9]*p[22] + 2*p[9]*p[9]*p[31] + 2*p[9]*p[10]*p[20] - 2*p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + 2*p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - 2*p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - 2*p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + 2*p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - 2*p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + 2*p[13]*p[14]*p[29];
- coeff[137] = -2*p[0]*p[7]*p[13]*p[15] + 2*p[0]*p[8]*p[14]*p[15] - 2*p[0]*p[9]*p[11]*p[15] + 2*p[0]*p[10]*p[12]*p[15] + 2*p[7]*p[9]*p[15] - 2*p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + 2*p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + 2*p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - 2*p[12]*p[14]*p[24];
- coeff[138] = 2*p[0]*p[7]*p[12]*p[19] - 2*p[0]*p[7]*p[13]*p[16] + 2*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[14]*p[16] - 2*p[0]*p[9]*p[11]*p[16] + 2*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[13]*p[19] - 2*p[7]*p[8]*p[19] + 2*p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - 2*p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + 2*p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + 2*p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - 2*p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + 2*p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - 2*p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - 2*p[13]*p[14]*p[28];
- coeff[139] = 2*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[12]*p[20] - 2*p[0]*p[7]*p[13]*p[17] + 2*p[0]*p[8]*p[11]*p[20] - 2*p[0]*p[8]*p[12]*p[22] + 2*p[0]*p[8]*p[14]*p[17] - 2*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[13]*p[22] + 2*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[14]*p[22] - 2*p[7]*p[8]*p[20] + 2*p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - 2*p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - 2*p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + 2*p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - 2*p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + 2*p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - 2*p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + 2*p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + 2*p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - 2*p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + 2*p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - 2*p[13]*p[14]*p[29];
- coeff[140] = 0;
- coeff[141] = 2*(-p[7]*p[13]*p[15] + p[7]*p[13]*p[24] + p[8]*p[14]*p[15] - p[8]*p[14]*p[24] - p[9]*p[11]*p[15] + p[9]*p[11]*p[24] + p[10]*p[12]*p[15] - p[10]*p[12]*p[24])*p[0];
- coeff[142] = 2*(p[7]*p[12]*p[19] - p[7]*p[12]*p[28] - p[7]*p[13]*p[16] + p[7]*p[13]*p[25] + p[8]*p[11]*p[19] - p[8]*p[11]*p[28] + p[8]*p[14]*p[16] - p[8]*p[14]*p[25] - p[9]*p[11]*p[16] + p[9]*p[11]*p[25] + p[9]*p[14]*p[19] - p[9]*p[14]*p[28] + p[10]*p[12]*p[16] - p[10]*p[12]*p[25] + p[10]*p[13]*p[19] - p[10]*p[13]*p[28])*p[0];
- coeff[143] = 2*(p[7]*p[11]*p[22] - p[7]*p[11]*p[31] + p[7]*p[12]*p[20] - p[7]*p[12]*p[29] - p[7]*p[13]*p[17] + p[7]*p[13]*p[26] + p[8]*p[11]*p[20] - p[8]*p[11]*p[29] - p[8]*p[12]*p[22] + p[8]*p[12]*p[31] + p[8]*p[14]*p[17] - p[8]*p[14]*p[26] - p[9]*p[11]*p[17] + p[9]*p[11]*p[26] - p[9]*p[13]*p[22] + p[9]*p[13]*p[31] + p[9]*p[14]*p[20] - p[9]*p[14]*p[29] + p[10]*p[12]*p[17] - p[10]*p[12]*p[26] + p[10]*p[13]*p[20] - p[10]*p[13]*p[29] + p[10]*p[14]*p[22] - p[10]*p[14]*p[31])*p[0];
- coeff[144] = 2*(p[7]*p[13]*p[15] - p[7]*p[13]*p[24] - p[8]*p[14]*p[15] + p[8]*p[14]*p[24] + p[9]*p[11]*p[15] - p[9]*p[11]*p[24] - p[10]*p[12]*p[15] + p[10]*p[12]*p[24])*p[0];
- coeff[145] = 2*(-p[7]*p[12]*p[19] + p[7]*p[12]*p[28] + p[7]*p[13]*p[16] - p[7]*p[13]*p[25] - p[8]*p[11]*p[19] + p[8]*p[11]*p[28] - p[8]*p[14]*p[16] + p[8]*p[14]*p[25] + p[9]*p[11]*p[16] - p[9]*p[11]*p[25] - p[9]*p[14]*p[19] + p[9]*p[14]*p[28] - p[10]*p[12]*p[16] + p[10]*p[12]*p[25] - p[10]*p[13]*p[19] + p[10]*p[13]*p[28])*p[0];
- coeff[146] = 2*(-p[7]*p[11]*p[22] + p[7]*p[11]*p[31] - p[7]*p[12]*p[20] + p[7]*p[12]*p[29] + p[7]*p[13]*p[17] - p[7]*p[13]*p[26] - p[8]*p[11]*p[20] + p[8]*p[11]*p[29] + p[8]*p[12]*p[22] - p[8]*p[12]*p[31] - p[8]*p[14]*p[17] + p[8]*p[14]*p[26] + p[9]*p[11]*p[17] - p[9]*p[11]*p[26] + p[9]*p[13]*p[22] - p[9]*p[13]*p[31] - p[9]*p[14]*p[20] + p[9]*p[14]*p[29] - p[10]*p[12]*p[17] + p[10]*p[12]*p[26] - p[10]*p[13]*p[20] + p[10]*p[13]*p[29] - p[10]*p[14]*p[22] + p[10]*p[14]*p[31])*p[0];
- coeff[147] = -2*p[0]*p[7]*p[8]*p[21] + 2*p[0]*p[7]*p[9]*p[18] + 2*p[0]*p[8]*p[8]*p[23] - 2*p[0]*p[8]*p[10]*p[18] + 2*p[0]*p[9]*p[9]*p[23] - 2*p[0]*p[9]*p[10]*p[21] + 2*p[0]*p[11]*p[12]*p[21] - 2*p[0]*p[11]*p[13]*p[18] - 2*p[0]*p[12]*p[12]*p[23] + 2*p[0]*p[12]*p[14]*p[18] - 2*p[0]*p[13]*p[13]*p[23] + 2*p[0]*p[13]*p[14]*p[21] - p[7]*p[11]*p[23] + p[7]*p[11]*p[32] - p[7]*p[12]*p[21] + p[7]*p[12]*p[30] + p[7]*p[13]*p[18] - p[7]*p[13]*p[27] - p[8]*p[11]*p[21] + p[8]*p[11]*p[30] + p[8]*p[12]*p[23] - p[8]*p[12]*p[32] - p[8]*p[14]*p[18] + p[8]*p[14]*p[27] + p[9]*p[11]*p[18] - p[9]*p[11]*p[27] + p[9]*p[13]*p[23] - p[9]*p[13]*p[32] - p[9]*p[14]*p[21] + p[9]*p[14]*p[30] - p[10]*p[12]*p[18] + p[10]*p[12]*p[27] - p[10]*p[13]*p[21] + p[10]*p[13]*p[30] - p[10]*p[14]*p[23] + p[10]*p[14]*p[32];
- coeff[148] = 2*p[0]*p[7]*p[9]*p[15] - 2*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[11]*p[13]*p[15] + 2*p[0]*p[12]*p[14]*p[15] + 2*p[7]*p[13]*p[15] - p[7]*p[13]*p[24] - 2*p[8]*p[14]*p[15] + p[8]*p[14]*p[24] + 2*p[9]*p[11]*p[15] - p[9]*p[11]*p[24] - 2*p[10]*p[12]*p[15] + p[10]*p[12]*p[24];
- coeff[149] = -2*p[0]*p[7]*p[8]*p[19] + 2*p[0]*p[7]*p[9]*p[16] - 2*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[9]*p[10]*p[19] + 2*p[0]*p[11]*p[12]*p[19] - 2*p[0]*p[11]*p[13]*p[16] + 2*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[13]*p[14]*p[19] - 2*p[7]*p[12]*p[19] + p[7]*p[12]*p[28] + 2*p[7]*p[13]*p[16] - p[7]*p[13]*p[25] - 2*p[8]*p[11]*p[19] + p[8]*p[11]*p[28] - 2*p[8]*p[14]*p[16] + p[8]*p[14]*p[25] + 2*p[9]*p[11]*p[16] - p[9]*p[11]*p[25] - 2*p[9]*p[14]*p[19] + p[9]*p[14]*p[28] - 2*p[10]*p[12]*p[16] + p[10]*p[12]*p[25] - 2*p[10]*p[13]*p[19] + p[10]*p[13]*p[28];
- coeff[150] = -2*p[0]*p[7]*p[8]*p[20] + 2*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[8]*p[8]*p[22] - 2*p[0]*p[8]*p[10]*p[17] + 2*p[0]*p[9]*p[9]*p[22] - 2*p[0]*p[9]*p[10]*p[20] + 2*p[0]*p[11]*p[12]*p[20] - 2*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[12]*p[12]*p[22] + 2*p[0]*p[12]*p[14]*p[17] - 2*p[0]*p[13]*p[13]*p[22] + 2*p[0]*p[13]*p[14]*p[20] - 2*p[7]*p[11]*p[22] + p[7]*p[11]*p[31] - 2*p[7]*p[12]*p[20] + p[7]*p[12]*p[29] + 2*p[7]*p[13]*p[17] - p[7]*p[13]*p[26] - 2*p[8]*p[11]*p[20] + p[8]*p[11]*p[29] + 2*p[8]*p[12]*p[22] - p[8]*p[12]*p[31] - 2*p[8]*p[14]*p[17] + p[8]*p[14]*p[26] + 2*p[9]*p[11]*p[17] - p[9]*p[11]*p[26] + 2*p[9]*p[13]*p[22] - p[9]*p[13]*p[31] - 2*p[9]*p[14]*p[20] + p[9]*p[14]*p[29] - 2*p[10]*p[12]*p[17] + p[10]*p[12]*p[26] - 2*p[10]*p[13]*p[20] + p[10]*p[13]*p[29] - 2*p[10]*p[14]*p[22] + p[10]*p[14]*p[31];
- coeff[151] = (-p[7]*p[13] + p[8]*p[14] - p[9]*p[11] + p[10]*p[12])*p[15];
- coeff[152] = p[7]*p[12]*p[19] - p[7]*p[13]*p[16] + p[8]*p[11]*p[19] + p[8]*p[14]*p[16] - p[9]*p[11]*p[16] + p[9]*p[14]*p[19] + p[10]*p[12]*p[16] + p[10]*p[13]*p[19];
- coeff[153] = p[7]*p[11]*p[22] + p[7]*p[12]*p[20] - p[7]*p[13]*p[17] + p[8]*p[11]*p[20] - p[8]*p[12]*p[22] + p[8]*p[14]*p[17] - p[9]*p[11]*p[17] - p[9]*p[13]*p[22] + p[9]*p[14]*p[20] + p[10]*p[12]*p[17] + p[10]*p[13]*p[20] + p[10]*p[14]*p[22];
- coeff[154] = 2*(p[7]*p[8]*p[21] - p[7]*p[8]*p[30] - p[7]*p[9]*p[18] + p[7]*p[9]*p[27] - p[8]*p[8]*p[23] + p[8]*p[8]*p[32] + p[8]*p[10]*p[18] - p[8]*p[10]*p[27] - p[9]*p[9]*p[23] + p[9]*p[9]*p[32] + p[9]*p[10]*p[21] - p[9]*p[10]*p[30] - p[11]*p[12]*p[21] + p[11]*p[12]*p[30] + p[11]*p[13]*p[18] - p[11]*p[13]*p[27] + p[12]*p[12]*p[23] - p[12]*p[12]*p[32] - p[12]*p[14]*p[18] + p[12]*p[14]*p[27] + p[13]*p[13]*p[23] - p[13]*p[13]*p[32] - p[13]*p[14]*p[21] + p[13]*p[14]*p[30])*p[0];
- coeff[155] = -4*p[0]*p[7]*p[9]*p[15] + 2*p[0]*p[7]*p[9]*p[24] + 4*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[8]*p[10]*p[24] + 4*p[0]*p[11]*p[13]*p[15] - 2*p[0]*p[11]*p[13]*p[24] - 4*p[0]*p[12]*p[14]*p[15] + 2*p[0]*p[12]*p[14]*p[24] - 2*p[7]*p[13]*p[15] + 2*p[7]*p[13]*p[24] + 2*p[8]*p[14]*p[15] - 2*p[8]*p[14]*p[24] - 2*p[9]*p[11]*p[15] + 2*p[9]*p[11]*p[24] + 2*p[10]*p[12]*p[15] - 2*p[10]*p[12]*p[24];
- coeff[156] = 4*p[0]*p[7]*p[8]*p[19] - 2*p[0]*p[7]*p[8]*p[28] - 4*p[0]*p[7]*p[9]*p[16] + 2*p[0]*p[7]*p[9]*p[25] + 4*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[8]*p[10]*p[25] + 4*p[0]*p[9]*p[10]*p[19] - 2*p[0]*p[9]*p[10]*p[28] - 4*p[0]*p[11]*p[12]*p[19] + 2*p[0]*p[11]*p[12]*p[28] + 4*p[0]*p[11]*p[13]*p[16] - 2*p[0]*p[11]*p[13]*p[25] - 4*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[12]*p[14]*p[25] - 4*p[0]*p[13]*p[14]*p[19] + 2*p[0]*p[13]*p[14]*p[28] + 2*p[7]*p[12]*p[19] - 2*p[7]*p[12]*p[28] - 2*p[7]*p[13]*p[16] + 2*p[7]*p[13]*p[25] + 2*p[8]*p[11]*p[19] - 2*p[8]*p[11]*p[28] + 2*p[8]*p[14]*p[16] - 2*p[8]*p[14]*p[25] - 2*p[9]*p[11]*p[16] + 2*p[9]*p[11]*p[25] + 2*p[9]*p[14]*p[19] - 2*p[9]*p[14]*p[28] + 2*p[10]*p[12]*p[16] - 2*p[10]*p[12]*p[25] + 2*p[10]*p[13]*p[19] - 2*p[10]*p[13]*p[28];
- coeff[157] = 4*p[0]*p[7]*p[8]*p[20] - 2*p[0]*p[7]*p[8]*p[29] - 4*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[7]*p[9]*p[26] - 4*p[0]*p[8]*p[8]*p[22] + 2*p[0]*p[8]*p[8]*p[31] + 4*p[0]*p[8]*p[10]*p[17] - 2*p[0]*p[8]*p[10]*p[26] - 4*p[0]*p[9]*p[9]*p[22] + 2*p[0]*p[9]*p[9]*p[31] + 4*p[0]*p[9]*p[10]*p[20] - 2*p[0]*p[9]*p[10]*p[29] - 4*p[0]*p[11]*p[12]*p[20] + 2*p[0]*p[11]*p[12]*p[29] + 4*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[11]*p[13]*p[26] + 4*p[0]*p[12]*p[12]*p[22] - 2*p[0]*p[12]*p[12]*p[31] - 4*p[0]*p[12]*p[14]*p[17] + 2*p[0]*p[12]*p[14]*p[26] + 4*p[0]*p[13]*p[13]*p[22] - 2*p[0]*p[13]*p[13]*p[31] - 4*p[0]*p[13]*p[14]*p[20] + 2*p[0]*p[13]*p[14]*p[29] + 2*p[7]*p[11]*p[22] - 2*p[7]*p[11]*p[31] + 2*p[7]*p[12]*p[20] - 2*p[7]*p[12]*p[29] - 2*p[7]*p[13]*p[17] + 2*p[7]*p[13]*p[26] + 2*p[8]*p[11]*p[20] - 2*p[8]*p[11]*p[29] - 2*p[8]*p[12]*p[22] + 2*p[8]*p[12]*p[31] + 2*p[8]*p[14]*p[17] - 2*p[8]*p[14]*p[26] - 2*p[9]*p[11]*p[17] + 2*p[9]*p[11]*p[26] - 2*p[9]*p[13]*p[22] + 2*p[9]*p[13]*p[31] + 2*p[9]*p[14]*p[20] - 2*p[9]*p[14]*p[29] + 2*p[10]*p[12]*p[17] - 2*p[10]*p[12]*p[26] + 2*p[10]*p[13]*p[20] - 2*p[10]*p[13]*p[29] + 2*p[10]*p[14]*p[22] - 2*p[10]*p[14]*p[31];
- coeff[158] = 2*p[0]*p[7]*p[9]*p[15] - 2*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[11]*p[13]*p[15] + 2*p[0]*p[12]*p[14]*p[15] + 2*p[7]*p[13]*p[15] - 2*p[7]*p[13]*p[24] - 2*p[8]*p[14]*p[15] + 2*p[8]*p[14]*p[24] + 2*p[9]*p[11]*p[15] - 2*p[9]*p[11]*p[24] - 2*p[10]*p[12]*p[15] + 2*p[10]*p[12]*p[24];
- coeff[159] = -2*p[0]*p[7]*p[8]*p[19] + 2*p[0]*p[7]*p[9]*p[16] - 2*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[9]*p[10]*p[19] + 2*p[0]*p[11]*p[12]*p[19] - 2*p[0]*p[11]*p[13]*p[16] + 2*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[13]*p[14]*p[19] - 2*p[7]*p[12]*p[19] + 2*p[7]*p[12]*p[28] + 2*p[7]*p[13]*p[16] - 2*p[7]*p[13]*p[25] - 2*p[8]*p[11]*p[19] + 2*p[8]*p[11]*p[28] - 2*p[8]*p[14]*p[16] + 2*p[8]*p[14]*p[25] + 2*p[9]*p[11]*p[16] - 2*p[9]*p[11]*p[25] - 2*p[9]*p[14]*p[19] + 2*p[9]*p[14]*p[28] - 2*p[10]*p[12]*p[16] + 2*p[10]*p[12]*p[25] - 2*p[10]*p[13]*p[19] + 2*p[10]*p[13]*p[28];
- coeff[160] = -2*p[0]*p[7]*p[8]*p[20] + 2*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[8]*p[8]*p[22] - 2*p[0]*p[8]*p[10]*p[17] + 2*p[0]*p[9]*p[9]*p[22] - 2*p[0]*p[9]*p[10]*p[20] + 2*p[0]*p[11]*p[12]*p[20] - 2*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[12]*p[12]*p[22] + 2*p[0]*p[12]*p[14]*p[17] - 2*p[0]*p[13]*p[13]*p[22] + 2*p[0]*p[13]*p[14]*p[20] - 2*p[7]*p[11]*p[22] + 2*p[7]*p[11]*p[31] - 2*p[7]*p[12]*p[20] + 2*p[7]*p[12]*p[29] + 2*p[7]*p[13]*p[17] - 2*p[7]*p[13]*p[26] - 2*p[8]*p[11]*p[20] + 2*p[8]*p[11]*p[29] + 2*p[8]*p[12]*p[22] - 2*p[8]*p[12]*p[31] - 2*p[8]*p[14]*p[17] + 2*p[8]*p[14]*p[26] + 2*p[9]*p[11]*p[17] - 2*p[9]*p[11]*p[26] + 2*p[9]*p[13]*p[22] - 2*p[9]*p[13]*p[31] - 2*p[9]*p[14]*p[20] + 2*p[9]*p[14]*p[29] - 2*p[10]*p[12]*p[17] + 2*p[10]*p[12]*p[26] - 2*p[10]*p[13]*p[20] + 2*p[10]*p[13]*p[29] - 2*p[10]*p[14]*p[22] + 2*p[10]*p[14]*p[31];
- coeff[161] = 0;
- coeff[162] = 2*(p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - p[8]*p[10]*p[15] + p[8]*p[10]*p[24] - p[11]*p[13]*p[15] + p[11]*p[13]*p[24] + p[12]*p[14]*p[15] - p[12]*p[14]*p[24])*p[0];
- coeff[163] = 2*(-p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - p[9]*p[10]*p[19] + p[9]*p[10]*p[28] + p[11]*p[12]*p[19] - p[11]*p[12]*p[28] - p[11]*p[13]*p[16] + p[11]*p[13]*p[25] + p[12]*p[14]*p[16] - p[12]*p[14]*p[25] + p[13]*p[14]*p[19] - p[13]*p[14]*p[28])*p[0];
- coeff[164] = 2*(-p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - p[9]*p[10]*p[20] + p[9]*p[10]*p[29] + p[11]*p[12]*p[20] - p[11]*p[12]*p[29] - p[11]*p[13]*p[17] + p[11]*p[13]*p[26] - p[12]*p[12]*p[22] + p[12]*p[12]*p[31] + p[12]*p[14]*p[17] - p[12]*p[14]*p[26] - p[13]*p[13]*p[22] + p[13]*p[13]*p[31] + p[13]*p[14]*p[20] - p[13]*p[14]*p[29])*p[0];
- coeff[165] = 2*(-p[7]*p[9]*p[15] + p[7]*p[9]*p[24] + p[8]*p[10]*p[15] - p[8]*p[10]*p[24] + p[11]*p[13]*p[15] - p[11]*p[13]*p[24] - p[12]*p[14]*p[15] + p[12]*p[14]*p[24])*p[0];
- coeff[166] = 2*(p[7]*p[8]*p[19] - p[7]*p[8]*p[28] - p[7]*p[9]*p[16] + p[7]*p[9]*p[25] + p[8]*p[10]*p[16] - p[8]*p[10]*p[25] + p[9]*p[10]*p[19] - p[9]*p[10]*p[28] - p[11]*p[12]*p[19] + p[11]*p[12]*p[28] + p[11]*p[13]*p[16] - p[11]*p[13]*p[25] - p[12]*p[14]*p[16] + p[12]*p[14]*p[25] - p[13]*p[14]*p[19] + p[13]*p[14]*p[28])*p[0];
- coeff[167] = 2*(p[7]*p[8]*p[20] - p[7]*p[8]*p[29] - p[7]*p[9]*p[17] + p[7]*p[9]*p[26] - p[8]*p[8]*p[22] + p[8]*p[8]*p[31] + p[8]*p[10]*p[17] - p[8]*p[10]*p[26] - p[9]*p[9]*p[22] + p[9]*p[9]*p[31] + p[9]*p[10]*p[20] - p[9]*p[10]*p[29] - p[11]*p[12]*p[20] + p[11]*p[12]*p[29] + p[11]*p[13]*p[17] - p[11]*p[13]*p[26] + p[12]*p[12]*p[22] - p[12]*p[12]*p[31] - p[12]*p[14]*p[17] + p[12]*p[14]*p[26] + p[13]*p[13]*p[22] - p[13]*p[13]*p[31] - p[13]*p[14]*p[20] + p[13]*p[14]*p[29])*p[0];
-}
-
-} // namespace embree
diff --git a/thirdparty/embree-aarch64/kernels/common/point_query.h b/thirdparty/embree-aarch64/kernels/common/point_query.h
deleted file mode 100644
index 27d158ca3a..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/point_query.h
+++ /dev/null
@@ -1,136 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-
-namespace embree
-{
- /* Point query structure for closest point query */
- template<int K>
- struct RTC_ALIGN(16) PointQueryK
- {
- /* Default construction does nothing */
- __forceinline PointQueryK() {}
-
- /* Constructs a ray from origin, direction, and ray segment. Near
- * has to be smaller than far */
- __forceinline PointQueryK(const Vec3vf<K>& p, const vfloat<K>& radius = inf, const vfloat<K>& time = zero)
- : p(p), time(time), radius(radius) {}
-
- /* Returns the size of the ray */
- static __forceinline size_t size() { return K; }
-
- /* Calculates if this is a valid ray that does not cause issues during traversal */
- __forceinline vbool<K> valid() const
- {
- const vbool<K> vx = (abs(p.x) <= vfloat<K>(FLT_LARGE));
- const vbool<K> vy = (abs(p.y) <= vfloat<K>(FLT_LARGE));
- const vbool<K> vz = (abs(p.z) <= vfloat<K>(FLT_LARGE));
- const vbool<K> vn = radius >= vfloat<K>(0);
- const vbool<K> vf = abs(time) < vfloat<K>(inf);
- return vx & vy & vz & vn & vf;
- }
-
- __forceinline void get(PointQueryK<1>* ray) const;
- __forceinline void get(size_t i, PointQueryK<1>& ray) const;
- __forceinline void set(const PointQueryK<1>* ray);
- __forceinline void set(size_t i, const PointQueryK<1>& ray);
-
- Vec3vf<K> p; // location of the query point
- vfloat<K> time; // time for motion blur
- vfloat<K> radius; // radius for the point query
- };
-
- /* Specialization for a single point query */
- template<>
- struct RTC_ALIGN(16) PointQueryK<1>
- {
- /* Default construction does nothing */
- __forceinline PointQueryK() {}
-
- /* Constructs a ray from origin, direction, and ray segment. Near
- * has to be smaller than far */
- __forceinline PointQueryK(const Vec3fa& p, float radius = inf, float time = zero)
- : p(p), time(time), radius(radius) {}
-
- /* Calculates if this is a valid ray that does not cause issues during traversal */
- __forceinline bool valid() const {
- return all(le_mask(abs(Vec3fa(p)), Vec3fa(FLT_LARGE)) & le_mask(Vec3fa(0.f), Vec3fa(radius))) && abs(time) < float(inf);
- }
-
- Vec3f p;
- float time;
- float radius;
- };
-
- /* Converts point query packet to single point query */
- template<int K>
- __forceinline void PointQueryK<K>::get(PointQueryK<1>* query) const
- {
- for (size_t i = 0; i < K; i++) // FIXME: use SIMD transpose
- {
- query[i].p.x = p.x[i];
- query[i].p.y = p.y[i];
- query[i].p.z = p.z[i];
- query[i].time = time[i];
- query[i].radius = radius[i];
- }
- }
-
- /* Extracts a single point query out of a point query packet*/
- template<int K>
- __forceinline void PointQueryK<K>::get(size_t i, PointQueryK<1>& query) const
- {
- query.p.x = p.x[i];
- query.p.y = p.y[i];
- query.p.z = p.z[i];
- query.radius = radius[i];
- query.time = time[i];
- }
-
- /* Converts single point query to point query packet */
- template<int K>
- __forceinline void PointQueryK<K>::set(const PointQueryK<1>* query)
- {
- for (size_t i = 0; i < K; i++)
- {
- p.x[i] = query[i].p.x;
- p.y[i] = query[i].p.y;
- p.z[i] = query[i].p.z;
- radius[i] = query[i].radius;
- time[i] = query[i].time;
- }
- }
-
- /* inserts a single point query into a point query packet element */
- template<int K>
- __forceinline void PointQueryK<K>::set(size_t i, const PointQueryK<1>& query)
- {
- p.x[i] = query.p.x;
- p.y[i] = query.p.y;
- p.z[i] = query.p.z;
- radius[i] = query.radius;
- time[i] = query.time;
- }
-
- /* Shortcuts */
- typedef PointQueryK<1> PointQuery;
- typedef PointQueryK<4> PointQuery4;
- typedef PointQueryK<8> PointQuery8;
- typedef PointQueryK<16> PointQuery16;
- struct PointQueryN;
-
- /* Outputs point query to stream */
- template<int K>
- __forceinline embree_ostream operator <<(embree_ostream cout, const PointQueryK<K>& query)
- {
- cout << "{ " << embree_endl
- << " p = " << query.p << embree_endl
- << " r = " << query.radius << embree_endl
- << " time = " << query.time << embree_endl
- << "}";
- return cout;
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/primref.h b/thirdparty/embree-aarch64/kernels/common/primref.h
deleted file mode 100644
index ce75c982bb..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/primref.h
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-
-namespace embree
-{
- /*! A primitive reference stores the bounds of the primitive and its ID. */
- struct __aligned(32) PrimRef
- {
- __forceinline PrimRef () {}
-
-#if defined(__AVX__)
- __forceinline PrimRef(const PrimRef& v) {
- vfloat8::store((float*)this,vfloat8::load((float*)&v));
- }
- __forceinline PrimRef& operator=(const PrimRef& v) {
- vfloat8::store((float*)this,vfloat8::load((float*)&v)); return *this;
- }
-#endif
-
- __forceinline PrimRef (const BBox3fa& bounds, unsigned int geomID, unsigned int primID)
- {
- lower = Vec3fx(bounds.lower, geomID);
- upper = Vec3fx(bounds.upper, primID);
- }
-
- __forceinline PrimRef (const BBox3fa& bounds, size_t id)
- {
-#if defined(__X86_64__) || defined(__aarch64__)
- lower = Vec3fx(bounds.lower, (unsigned)(id & 0xFFFFFFFF));
- upper = Vec3fx(bounds.upper, (unsigned)((id >> 32) & 0xFFFFFFFF));
-#else
- lower = Vec3fx(bounds.lower, (unsigned)id);
- upper = Vec3fx(bounds.upper, (unsigned)0);
-#endif
- }
-
- /*! calculates twice the center of the primitive */
- __forceinline const Vec3fa center2() const {
- return lower+upper;
- }
-
- /*! return the bounding box of the primitive */
- __forceinline const BBox3fa bounds() const {
- return BBox3fa(lower,upper);
- }
-
- /*! size for bin heuristic is 1 */
- __forceinline unsigned size() const {
- return 1;
- }
-
- /*! returns bounds and centroid used for binning */
- __forceinline void binBoundsAndCenter(BBox3fa& bounds_o, Vec3fa& center_o) const
- {
- bounds_o = bounds();
- center_o = embree::center2(bounds_o);
- }
-
- __forceinline unsigned& geomIDref() { // FIXME: remove !!!!!!!
- return lower.u;
- }
- __forceinline unsigned& primIDref() { // FIXME: remove !!!!!!!
- return upper.u;
- }
-
- /*! returns the geometry ID */
- __forceinline unsigned geomID() const {
- return lower.a;
- }
-
- /*! returns the primitive ID */
- __forceinline unsigned primID() const {
- return upper.a;
- }
-
- /*! returns an size_t sized ID */
- __forceinline size_t ID() const {
-#if defined(__X86_64__) || defined(__aarch64__)
- return size_t(lower.u) + (size_t(upper.u) << 32);
-#else
- return size_t(lower.u);
-#endif
- }
-
- /*! special function for operator< */
- __forceinline uint64_t ID64() const {
- return (((uint64_t)primID()) << 32) + (uint64_t)geomID();
- }
-
- /*! allows sorting the primrefs by ID */
- friend __forceinline bool operator<(const PrimRef& p0, const PrimRef& p1) {
- return p0.ID64() < p1.ID64();
- }
-
- /*! Outputs primitive reference to a stream. */
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRef& ref) {
- return cout << "{ lower = " << ref.lower << ", upper = " << ref.upper << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << " }";
- }
-
- public:
- Vec3fx lower; //!< lower bounds and geomID
- Vec3fx upper; //!< upper bounds and primID
- };
-
- /*! fast exchange for PrimRefs */
- __forceinline void xchg(PrimRef& a, PrimRef& b)
- {
-#if defined(__AVX__)
- const vfloat8 aa = vfloat8::load((float*)&a);
- const vfloat8 bb = vfloat8::load((float*)&b);
- vfloat8::store((float*)&a,bb);
- vfloat8::store((float*)&b,aa);
-#else
- std::swap(a,b);
-#endif
- }
-
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
- /************************************************************************************/
-
- struct SubGridBuildData {
- unsigned short sx,sy;
- unsigned int primID;
-
- __forceinline SubGridBuildData() {};
- __forceinline SubGridBuildData(const unsigned int sx, const unsigned int sy, const unsigned int primID) : sx(sx), sy(sy), primID(primID) {};
-
- __forceinline size_t x() const { return (size_t)sx & 0x7fff; }
- __forceinline size_t y() const { return (size_t)sy & 0x7fff; }
-
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/primref_mb.h b/thirdparty/embree-aarch64/kernels/common/primref_mb.h
deleted file mode 100644
index b6c1ad5712..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/primref_mb.h
+++ /dev/null
@@ -1,262 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-
-#define MBLUR_BIN_LBBOX 1
-
-namespace embree
-{
-#if MBLUR_BIN_LBBOX
-
- /*! A primitive reference stores the bounds of the primitive and its ID. */
- struct PrimRefMB
- {
- typedef LBBox3fa BBox;
-
- __forceinline PrimRefMB () {}
-
- __forceinline PrimRefMB (const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, unsigned int geomID, unsigned int primID)
- : lbounds((LBBox3fx)lbounds_i), time_range(time_range)
- {
- assert(activeTimeSegments > 0);
- lbounds.bounds0.lower.a = geomID;
- lbounds.bounds0.upper.a = primID;
- lbounds.bounds1.lower.a = activeTimeSegments;
- lbounds.bounds1.upper.a = totalTimeSegments;
- }
-
- __forceinline PrimRefMB (EmptyTy empty, const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id)
- : lbounds((LBBox3fx)lbounds_i), time_range(time_range)
- {
- assert(activeTimeSegments > 0);
-#if defined(__X86_64__) || defined(__aarch64__)
- lbounds.bounds0.lower.a = id & 0xFFFFFFFF;
- lbounds.bounds0.upper.a = (id >> 32) & 0xFFFFFFFF;
-#else
- lbounds.bounds0.lower.a = id;
- lbounds.bounds0.upper.a = 0;
-#endif
- lbounds.bounds1.lower.a = activeTimeSegments;
- lbounds.bounds1.upper.a = totalTimeSegments;
- }
-
- __forceinline PrimRefMB (const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id)
- : lbounds((LBBox3fx)lbounds_i), time_range(time_range)
- {
- assert(activeTimeSegments > 0);
-#if defined(__X86_64__) || defined(__aarch64__)
- lbounds.bounds0.lower.u = id & 0xFFFFFFFF;
- lbounds.bounds0.upper.u = (id >> 32) & 0xFFFFFFFF;
-#else
- lbounds.bounds0.lower.u = id;
- lbounds.bounds0.upper.u = 0;
-#endif
- lbounds.bounds1.lower.a = activeTimeSegments;
- lbounds.bounds1.upper.a = totalTimeSegments;
- }
-
- /*! returns bounds for binning */
- __forceinline LBBox3fa bounds() const {
- return lbounds;
- }
-
- /*! returns the number of time segments of this primref */
- __forceinline unsigned size() const {
- return lbounds.bounds1.lower.a;
- }
-
- __forceinline unsigned totalTimeSegments() const {
- return lbounds.bounds1.upper.a;
- }
-
- /* calculate overlapping time segment range */
- __forceinline range<int> timeSegmentRange(const BBox1f& range) const {
- return getTimeSegmentRange(range,time_range,float(totalTimeSegments()));
- }
-
- /* returns time that corresponds to time step */
- __forceinline float timeStep(const int i) const {
- assert(i>=0 && i<=(int)totalTimeSegments());
- return time_range.lower + time_range.size()*float(i)/float(totalTimeSegments());
- }
-
- /*! checks if time range overlaps */
- __forceinline bool time_range_overlap(const BBox1f& range) const
- {
- if (0.9999f*time_range.upper <= range.lower) return false;
- if (1.0001f*time_range.lower >= range.upper) return false;
- return true;
- }
-
- /*! returns center for binning */
- __forceinline Vec3fa binCenter() const {
- return center2(lbounds.interpolate(0.5f));
- }
-
- /*! returns bounds and centroid used for binning */
- __forceinline void binBoundsAndCenter(LBBox3fa& bounds_o, Vec3fa& center_o) const
- {
- bounds_o = bounds();
- center_o = binCenter();
- }
-
- /*! returns the geometry ID */
- __forceinline unsigned geomID() const {
- return lbounds.bounds0.lower.a;
- }
-
- /*! returns the primitive ID */
- __forceinline unsigned primID() const {
- return lbounds.bounds0.upper.a;
- }
-
- /*! returns an size_t sized ID */
- __forceinline size_t ID() const {
-#if defined(__X86_64__) || defined(__aarch64__)
- return size_t(lbounds.bounds0.lower.u) + (size_t(lbounds.bounds0.upper.u) << 32);
-#else
- return size_t(lbounds.bounds0.lower.u);
-#endif
- }
-
- /*! special function for operator< */
- __forceinline uint64_t ID64() const {
- return (((uint64_t)primID()) << 32) + (uint64_t)geomID();
- }
-
- /*! allows sorting the primrefs by ID */
- friend __forceinline bool operator<(const PrimRefMB& p0, const PrimRefMB& p1) {
- return p0.ID64() < p1.ID64();
- }
-
- /*! Outputs primitive reference to a stream. */
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRefMB& ref) {
- return cout << "{ time_range = " << ref.time_range << ", bounds = " << ref.bounds() << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << ", active_segments = " << ref.size() << ", total_segments = " << ref.totalTimeSegments() << " }";
- }
-
- public:
- LBBox3fx lbounds;
- BBox1f time_range; // entire geometry time range
- };
-
-#else
-
- /*! A primitive reference stores the bounds of the primitive and its ID. */
- struct __aligned(16) PrimRefMB
- {
- typedef BBox3fa BBox;
-
- __forceinline PrimRefMB () {}
-
- __forceinline PrimRefMB (const LBBox3fa& bounds, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, unsigned int geomID, unsigned int primID)
- : bbox(bounds.interpolate(0.5f)), _activeTimeSegments(activeTimeSegments), _totalTimeSegments(totalTimeSegments), time_range(time_range)
- {
- assert(activeTimeSegments > 0);
- bbox.lower.a = geomID;
- bbox.upper.a = primID;
- }
-
- __forceinline PrimRefMB (EmptyTy empty, const LBBox3fa& bounds, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id)
- : bbox(bounds.interpolate(0.5f)), _activeTimeSegments(activeTimeSegments), _totalTimeSegments(totalTimeSegments), time_range(time_range)
- {
- assert(activeTimeSegments > 0);
-#if defined(__X86_64__) || defined(__aarch64__)
- bbox.lower.u = id & 0xFFFFFFFF;
- bbox.upper.u = (id >> 32) & 0xFFFFFFFF;
-#else
- bbox.lower.u = id;
- bbox.upper.u = 0;
-#endif
- }
-
- /*! returns bounds for binning */
- __forceinline BBox3fa bounds() const {
- return bbox;
- }
-
- /*! returns the number of time segments of this primref */
- __forceinline unsigned int size() const {
- return _activeTimeSegments;
- }
-
- __forceinline unsigned int totalTimeSegments() const {
- return _totalTimeSegments;
- }
-
- /* calculate overlapping time segment range */
- __forceinline range<int> timeSegmentRange(const BBox1f& range) const {
- return getTimeSegmentRange(range,time_range,float(_totalTimeSegments));
- }
-
- /* returns time that corresponds to time step */
- __forceinline float timeStep(const int i) const {
- assert(i>=0 && i<=(int)_totalTimeSegments);
- return time_range.lower + time_range.size()*float(i)/float(_totalTimeSegments);
- }
-
- /*! checks if time range overlaps */
- __forceinline bool time_range_overlap(const BBox1f& range) const
- {
- if (0.9999f*time_range.upper <= range.lower) return false;
- if (1.0001f*time_range.lower >= range.upper) return false;
- return true;
- }
-
- /*! returns center for binning */
- __forceinline Vec3fa binCenter() const {
- return center2(bounds());
- }
-
- /*! returns bounds and centroid used for binning */
- __forceinline void binBoundsAndCenter(BBox3fa& bounds_o, Vec3fa& center_o) const
- {
- bounds_o = bounds();
- center_o = center2(bounds());
- }
-
- /*! returns the geometry ID */
- __forceinline unsigned int geomID() const {
- return bbox.lower.a;
- }
-
- /*! returns the primitive ID */
- __forceinline unsigned int primID() const {
- return bbox.upper.a;
- }
-
- /*! returns an size_t sized ID */
- __forceinline size_t ID() const {
-#if defined(__X86_64__) || defined(__aarch64__)
- return size_t(bbox.lower.u) + (size_t(bbox.upper.u) << 32);
-#else
- return size_t(bbox.lower.u);
-#endif
- }
-
- /*! special function for operator< */
- __forceinline uint64_t ID64() const {
- return (((uint64_t)primID()) << 32) + (uint64_t)geomID();
- }
-
- /*! allows sorting the primrefs by ID */
- friend __forceinline bool operator<(const PrimRefMB& p0, const PrimRefMB& p1) {
- return p0.ID64() < p1.ID64();
- }
-
- /*! Outputs primitive reference to a stream. */
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRefMB& ref) {
- return cout << "{ bounds = " << ref.bounds() << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << ", active_segments = " << ref.size() << ", total_segments = " << ref.totalTimeSegments() << " }";
- }
-
- public:
- BBox3fa bbox; // bounds, geomID, primID
- unsigned int _activeTimeSegments;
- unsigned int _totalTimeSegments;
- BBox1f time_range; // entire geometry time range
- };
-
-#endif
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/profile.h b/thirdparty/embree-aarch64/kernels/common/profile.h
deleted file mode 100644
index a7de36414d..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/profile.h
+++ /dev/null
@@ -1,159 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-
-namespace embree
-{
- /*! helper structure for the implementation of the profile functions below */
- struct ProfileTimer
- {
- static const size_t N = 20;
-
- ProfileTimer () {}
-
- ProfileTimer (const size_t numSkip) : i(0), j(0), maxJ(0), numSkip(numSkip), t0(0)
- {
- for (size_t i=0; i<N; i++) names[i] = nullptr;
- for (size_t i=0; i<N; i++) dt_fst[i] = 0.0;
- for (size_t i=0; i<N; i++) dt_min[i] = pos_inf;
- for (size_t i=0; i<N; i++) dt_avg[i] = 0.0;
- for (size_t i=0; i<N; i++) dt_max[i] = neg_inf;
- }
-
- __forceinline void begin()
- {
- j=0;
- t0 = tj = getSeconds();
- }
-
- __forceinline void end() {
- absolute("total");
- i++;
- }
-
- __forceinline void operator() (const char* name) {
- relative(name);
- }
-
- __forceinline void absolute (const char* name)
- {
- const double t1 = getSeconds();
- const double dt = t1-t0;
- assert(names[j] == nullptr || names[j] == name);
- names[j] = name;
- if (i == 0) dt_fst[j] = dt;
- if (i>=numSkip) {
- dt_min[j] = min(dt_min[j],dt);
- dt_avg[j] = dt_avg[j] + dt;
- dt_max[j] = max(dt_max[j],dt);
- }
- j++;
- maxJ = max(maxJ,j);
- }
-
- __forceinline void relative (const char* name)
- {
- const double t1 = getSeconds();
- const double dt = t1-tj;
- tj = t1;
- assert(names[j] == nullptr || names[j] == name);
- names[j] = name;
- if (i == 0) dt_fst[j] = dt;
- if (i>=numSkip) {
- dt_min[j] = min(dt_min[j],dt);
- dt_avg[j] = dt_avg[j] + dt;
- dt_max[j] = max(dt_max[j],dt);
- }
- j++;
- maxJ = max(maxJ,j);
- }
-
- void print(size_t numElements)
- {
- for (size_t k=0; k<N; k++)
- dt_avg[k] /= double(i-numSkip);
-
- printf(" profile [M/s]:\n");
- for (size_t j=0; j<maxJ; j++)
- printf("%20s: fst = %7.2f M/s, min = %7.2f M/s, avg = %7.2f M/s, max = %7.2f M/s\n",
- names[j],numElements/dt_fst[j]*1E-6,numElements/dt_max[j]*1E-6,numElements/dt_avg[j]*1E-6,numElements/dt_min[j]*1E-6);
-
- printf(" profile [ms]:\n");
- for (size_t j=0; j<maxJ; j++)
- printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",
- names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);
- }
-
- void print()
- {
- printf(" profile:\n");
-
- for (size_t k=0; k<N; k++)
- dt_avg[k] /= double(i-numSkip);
-
- for (size_t j=0; j<maxJ; j++) {
- printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",
- names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);
- }
- }
-
- double avg() {
- return dt_avg[maxJ-1]/double(i-numSkip);
- }
-
- private:
- size_t i;
- size_t j;
- size_t maxJ;
- size_t numSkip;
- double t0;
- double tj;
- const char* names[N];
- double dt_fst[N];
- double dt_min[N];
- double dt_avg[N];
- double dt_max[N];
- };
-
- /*! This function executes some code block multiple times and measured sections of it.
- Use the following way:
-
- profile(1,10,1000,[&](ProfileTimer& timer) {
- // code
- timer("A");
- // code
- timer("B");
- });
- */
- template<typename Closure>
- void profile(const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)
- {
- ProfileTimer timer(numSkip);
-
- for (size_t i=0; i<numSkip+numIter; i++)
- {
- timer.begin();
- closure(timer);
- timer.end();
- }
- timer.print(numElements);
- }
-
- /*! similar as the function above, but the timer object comes externally */
- template<typename Closure>
- void profile(ProfileTimer& timer, const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)
- {
- timer = ProfileTimer(numSkip);
-
- for (size_t i=0; i<numSkip+numIter; i++)
- {
- timer.begin();
- closure(timer);
- timer.end();
- }
- timer.print(numElements);
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/ray.h b/thirdparty/embree-aarch64/kernels/common/ray.h
deleted file mode 100644
index 336d48942c..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/ray.h
+++ /dev/null
@@ -1,1517 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "instance_stack.h"
-
-// FIXME: if ray gets seperated into ray* and hit, uload4 needs to be adjusted
-
-namespace embree
-{
- static const size_t MAX_INTERNAL_STREAM_SIZE = 32;
-
- /* Ray structure for K rays */
- template<int K>
- struct RayK
- {
- /* Default construction does nothing */
- __forceinline RayK() {}
-
- /* Constructs a ray from origin, direction, and ray segment. Near
- * has to be smaller than far */
- __forceinline RayK(const Vec3vf<K>& org, const Vec3vf<K>& dir,
- const vfloat<K>& tnear = zero, const vfloat<K>& tfar = inf,
- const vfloat<K>& time = zero, const vint<K>& mask = -1, const vint<K>& id = 0, const vint<K>& flags = 0)
- : org(org), dir(dir), _tnear(tnear), tfar(tfar), _time(time), mask(mask), id(id), flags(flags) {}
-
- /* Returns the size of the ray */
- static __forceinline size_t size() { return K; }
-
- /* Calculates if this is a valid ray that does not cause issues during traversal */
- __forceinline vbool<K> valid() const
- {
- const vbool<K> vx = (abs(org.x) <= vfloat<K>(FLT_LARGE)) & (abs(dir.x) <= vfloat<K>(FLT_LARGE));
- const vbool<K> vy = (abs(org.y) <= vfloat<K>(FLT_LARGE)) & (abs(dir.y) <= vfloat<K>(FLT_LARGE));
- const vbool<K> vz = (abs(org.z) <= vfloat<K>(FLT_LARGE)) & (abs(dir.z) <= vfloat<K>(FLT_LARGE));
- const vbool<K> vn = abs(tnear()) <= vfloat<K>(inf);
- const vbool<K> vf = abs(tfar) <= vfloat<K>(inf);
- return vx & vy & vz & vn & vf;
- }
-
- __forceinline void get(RayK<1>* ray) const;
- __forceinline void get(size_t i, RayK<1>& ray) const;
- __forceinline void set(const RayK<1>* ray);
- __forceinline void set(size_t i, const RayK<1>& ray);
-
- __forceinline void copy(size_t dest, size_t source);
-
- __forceinline vint<K> octant() const
- {
- return select(dir.x < 0.0f, vint<K>(1), vint<K>(zero)) |
- select(dir.y < 0.0f, vint<K>(2), vint<K>(zero)) |
- select(dir.z < 0.0f, vint<K>(4), vint<K>(zero));
- }
-
- /* Ray data */
- Vec3vf<K> org; // ray origin
- vfloat<K> _tnear; // start of ray segment
- Vec3vf<K> dir; // ray direction
- vfloat<K> _time; // time of this ray for motion blur
- vfloat<K> tfar; // end of ray segment
- vint<K> mask; // used to mask out objects during traversal
- vint<K> id;
- vint<K> flags;
-
- __forceinline vfloat<K>& tnear() { return _tnear; }
- __forceinline vfloat<K>& time() { return _time; }
- __forceinline const vfloat<K>& tnear() const { return _tnear; }
- __forceinline const vfloat<K>& time() const { return _time; }
- };
-
- /* Ray+hit structure for K rays */
- template<int K>
- struct RayHitK : RayK<K>
- {
- using RayK<K>::org;
- using RayK<K>::_tnear;
- using RayK<K>::dir;
- using RayK<K>::_time;
- using RayK<K>::tfar;
- using RayK<K>::mask;
- using RayK<K>::id;
- using RayK<K>::flags;
-
- using RayK<K>::tnear;
- using RayK<K>::time;
-
- /* Default construction does nothing */
- __forceinline RayHitK() {}
-
- /* Constructs a ray from origin, direction, and ray segment. Near
- * has to be smaller than far */
- __forceinline RayHitK(const Vec3vf<K>& org, const Vec3vf<K>& dir,
- const vfloat<K>& tnear = zero, const vfloat<K>& tfar = inf,
- const vfloat<K>& time = zero, const vint<K>& mask = -1, const vint<K>& id = 0, const vint<K>& flags = 0)
- : RayK<K>(org, dir, tnear, tfar, time, mask, id, flags),
- geomID(RTC_INVALID_GEOMETRY_ID)
- {
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
- instID[l] = RTC_INVALID_GEOMETRY_ID;
- }
-
- __forceinline RayHitK(const RayK<K>& ray)
- : RayK<K>(ray),
- geomID(RTC_INVALID_GEOMETRY_ID)
- {
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
- instID[l] = RTC_INVALID_GEOMETRY_ID;
- }
-
- __forceinline RayHitK<K>& operator =(const RayK<K>& ray)
- {
- org = ray.org;
- _tnear = ray._tnear;
- dir = ray.dir;
- _time = ray._time;
- tfar = ray.tfar;
- mask = ray.mask;
- id = ray.id;
- flags = ray.flags;
-
- geomID = RTC_INVALID_GEOMETRY_ID;
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
- instID[l] = RTC_INVALID_GEOMETRY_ID;
-
- return *this;
- }
-
- /* Calculates if the hit is valid */
- __forceinline void verifyHit(const vbool<K>& valid0) const
- {
- vbool<K> valid = valid0 & geomID != vuint<K>(RTC_INVALID_GEOMETRY_ID);
- const vbool<K> vt = (abs(tfar) <= vfloat<K>(FLT_LARGE)) | (tfar == vfloat<K>(neg_inf));
- const vbool<K> vu = (abs(u) <= vfloat<K>(FLT_LARGE));
- const vbool<K> vv = (abs(u) <= vfloat<K>(FLT_LARGE));
- const vbool<K> vnx = abs(Ng.x) <= vfloat<K>(FLT_LARGE);
- const vbool<K> vny = abs(Ng.y) <= vfloat<K>(FLT_LARGE);
- const vbool<K> vnz = abs(Ng.z) <= vfloat<K>(FLT_LARGE);
- if (any(valid & !vt)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid t");
- if (any(valid & !vu)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid u");
- if (any(valid & !vv)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid v");
- if (any(valid & !vnx)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.x");
- if (any(valid & !vny)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.y");
- if (any(valid & !vnz)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.z");
- }
-
- __forceinline void get(RayHitK<1>* ray) const;
- __forceinline void get(size_t i, RayHitK<1>& ray) const;
- __forceinline void set(const RayHitK<1>* ray);
- __forceinline void set(size_t i, const RayHitK<1>& ray);
-
- __forceinline void copy(size_t dest, size_t source);
-
- /* Hit data */
- Vec3vf<K> Ng; // geometry normal
- vfloat<K> u; // barycentric u coordinate of hit
- vfloat<K> v; // barycentric v coordinate of hit
- vuint<K> primID; // primitive ID
- vuint<K> geomID; // geometry ID
- vuint<K> instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
- };
-
- /* Specialization for a single ray */
- template<>
- struct RayK<1>
- {
- /* Default construction does nothing */
- __forceinline RayK() {}
-
- /* Constructs a ray from origin, direction, and ray segment. Near
- * has to be smaller than far */
- __forceinline RayK(const Vec3fa& org, const Vec3fa& dir, float tnear = zero, float tfar = inf, float time = zero, int mask = -1, int id = 0, int flags = 0)
- : org(org,tnear), dir(dir,time), tfar(tfar), mask(mask), id(id), flags(flags) {}
-
- /* Calculates if this is a valid ray that does not cause issues during traversal */
- __forceinline bool valid() const {
- return all(le_mask(abs(Vec3fa(org)), Vec3fa(FLT_LARGE)) & le_mask(abs(Vec3fa(dir)), Vec3fa(FLT_LARGE))) && abs(tnear()) <= float(inf) && abs(tfar) <= float(inf);
- }
-
- /* Ray data */
- Vec3ff org; // 3 floats for ray origin, 1 float for tnear
- //float tnear; // start of ray segment
- Vec3ff dir; // 3 floats for ray direction, 1 float for time
- // float time;
- float tfar; // end of ray segment
- int mask; // used to mask out objects during traversal
- int id; // ray ID
- int flags; // ray flags
-
- __forceinline float& tnear() { return org.w; };
- __forceinline const float& tnear() const { return org.w; };
-
- __forceinline float& time() { return dir.w; };
- __forceinline const float& time() const { return dir.w; };
-
- };
-
- template<>
- struct RayHitK<1> : RayK<1>
- {
- /* Default construction does nothing */
- __forceinline RayHitK() {}
-
- /* Constructs a ray from origin, direction, and ray segment. Near
- * has to be smaller than far */
- __forceinline RayHitK(const Vec3fa& org, const Vec3fa& dir, float tnear = zero, float tfar = inf, float time = zero, int mask = -1, int id = 0, int flags = 0)
- : RayK<1>(org, dir, tnear, tfar, time, mask, id, flags),
- geomID(RTC_INVALID_GEOMETRY_ID) {}
-
- __forceinline RayHitK(const RayK<1>& ray)
- : RayK<1>(ray),
- geomID(RTC_INVALID_GEOMETRY_ID) {}
-
- __forceinline RayHitK<1>& operator =(const RayK<1>& ray)
- {
- org = ray.org;
- dir = ray.dir;
- tfar = ray.tfar;
- mask = ray.mask;
- id = ray.id;
- flags = ray.flags;
-
- geomID = RTC_INVALID_GEOMETRY_ID;
-
- return *this;
- }
-
- /* Calculates if the hit is valid */
- __forceinline void verifyHit() const
- {
- if (geomID == RTC_INVALID_GEOMETRY_ID) return;
- const bool vt = (abs(tfar) <= FLT_LARGE) || (tfar == float(neg_inf));
- const bool vu = (abs(u) <= FLT_LARGE);
- const bool vv = (abs(u) <= FLT_LARGE);
- const bool vnx = abs(Ng.x) <= FLT_LARGE;
- const bool vny = abs(Ng.y) <= FLT_LARGE;
- const bool vnz = abs(Ng.z) <= FLT_LARGE;
- if (!vt) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid t");
- if (!vu) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid u");
- if (!vv) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid v");
- if (!vnx) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.x");
- if (!vny) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.y");
- if (!vnz) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.z");
- }
-
- /* Hit data */
- Vec3f Ng; // not normalized geometry normal
- float u; // barycentric u coordinate of hit
- float v; // barycentric v coordinate of hit
- unsigned int primID; // primitive ID
- unsigned int geomID; // geometry ID
- unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
- };
-
- /* Converts ray packet to single rays */
- template<int K>
- __forceinline void RayK<K>::get(RayK<1>* ray) const
- {
- for (size_t i = 0; i < K; i++) // FIXME: use SIMD transpose
- {
- ray[i].org.x = org.x[i]; ray[i].org.y = org.y[i]; ray[i].org.z = org.z[i]; ray[i].tnear() = tnear()[i];
- ray[i].dir.x = dir.x[i]; ray[i].dir.y = dir.y[i]; ray[i].dir.z = dir.z[i]; ray[i].time() = time()[i];
- ray[i].tfar = tfar[i]; ray[i].mask = mask[i]; ray[i].id = id[i]; ray[i].flags = flags[i];
- }
- }
-
- template<int K>
- __forceinline void RayHitK<K>::get(RayHitK<1>* ray) const
- {
- // FIXME: use SIMD transpose
- for (size_t i = 0; i < K; i++)
- get(i, ray[i]);
- }
-
- /* Extracts a single ray out of a ray packet*/
- template<int K>
- __forceinline void RayK<K>::get(size_t i, RayK<1>& ray) const
- {
- ray.org.x = org.x[i]; ray.org.y = org.y[i]; ray.org.z = org.z[i]; ray.tnear() = tnear()[i];
- ray.dir.x = dir.x[i]; ray.dir.y = dir.y[i]; ray.dir.z = dir.z[i]; ray.time() = time()[i];
- ray.tfar = tfar[i]; ray.mask = mask[i]; ray.id = id[i]; ray.flags = flags[i];
- }
-
- template<int K>
- __forceinline void RayHitK<K>::get(size_t i, RayHitK<1>& ray) const
- {
- ray.org.x = org.x[i]; ray.org.y = org.y[i]; ray.org.z = org.z[i]; ray.tnear() = tnear()[i];
- ray.dir.x = dir.x[i]; ray.dir.y = dir.y[i]; ray.dir.z = dir.z[i]; ray.tfar = tfar[i]; ray.time() = time()[i];
- ray.mask = mask[i]; ray.id = id[i]; ray.flags = flags[i];
- ray.Ng.x = Ng.x[i]; ray.Ng.y = Ng.y[i]; ray.Ng.z = Ng.z[i];
- ray.u = u[i]; ray.v = v[i];
- ray.primID = primID[i]; ray.geomID = geomID[i];
-
- instance_id_stack::copy(instID, ray.instID, i);
- }
-
- /* Converts single rays to ray packet */
- template<int K>
- __forceinline void RayK<K>::set(const RayK<1>* ray)
- {
- // FIXME: use SIMD transpose
- for (size_t i = 0; i < K; i++)
- set(i, ray[i]);
- }
-
- template<int K>
- __forceinline void RayHitK<K>::set(const RayHitK<1>* ray)
- {
- // FIXME: use SIMD transpose
- for (size_t i = 0; i < K; i++)
- set(i, ray[i]);
- }
-
- /* inserts a single ray into a ray packet element */
- template<int K>
- __forceinline void RayK<K>::set(size_t i, const RayK<1>& ray)
- {
- org.x[i] = ray.org.x; org.y[i] = ray.org.y; org.z[i] = ray.org.z; tnear()[i] = ray.tnear();
- dir.x[i] = ray.dir.x; dir.y[i] = ray.dir.y; dir.z[i] = ray.dir.z; time()[i] = ray.time();
- tfar[i] = ray.tfar; mask[i] = ray.mask; id[i] = ray.id; flags[i] = ray.flags;
- }
-
- template<int K>
- __forceinline void RayHitK<K>::set(size_t i, const RayHitK<1>& ray)
- {
- org.x[i] = ray.org.x; org.y[i] = ray.org.y; org.z[i] = ray.org.z; tnear()[i] = ray.tnear();
- dir.x[i] = ray.dir.x; dir.y[i] = ray.dir.y; dir.z[i] = ray.dir.z; time()[i] = ray.time();
- tfar[i] = ray.tfar; mask[i] = ray.mask; id[i] = ray.id; flags[i] = ray.flags;
- Ng.x[i] = ray.Ng.x; Ng.y[i] = ray.Ng.y; Ng.z[i] = ray.Ng.z;
- u[i] = ray.u; v[i] = ray.v;
- primID[i] = ray.primID; geomID[i] = ray.geomID;
-
- instance_id_stack::copy(ray.instID, instID, i);
- }
-
- /* copies a ray packet element into another element*/
- template<int K>
- __forceinline void RayK<K>::copy(size_t dest, size_t source)
- {
- org.x[dest] = org.x[source]; org.y[dest] = org.y[source]; org.z[dest] = org.z[source]; tnear()[dest] = tnear()[source];
- dir.x[dest] = dir.x[source]; dir.y[dest] = dir.y[source]; dir.z[dest] = dir.z[source]; time()[dest] = time()[source];
- tfar [dest] = tfar[source]; mask[dest] = mask[source]; id[dest] = id[source]; flags[dest] = flags[source];
- }
-
- template<int K>
- __forceinline void RayHitK<K>::copy(size_t dest, size_t source)
- {
- org.x[dest] = org.x[source]; org.y[dest] = org.y[source]; org.z[dest] = org.z[source]; tnear()[dest] = tnear()[source];
- dir.x[dest] = dir.x[source]; dir.y[dest] = dir.y[source]; dir.z[dest] = dir.z[source]; time()[dest] = time()[source];
- tfar [dest] = tfar[source]; mask[dest] = mask[source]; id[dest] = id[source]; flags[dest] = flags[source];
- Ng.x[dest] = Ng.x[source]; Ng.y[dest] = Ng.y[source]; Ng.z[dest] = Ng.z[source];
- u[dest] = u[source]; v[dest] = v[source];
- primID[dest] = primID[source]; geomID[dest] = geomID[source];
-
- instance_id_stack::copy(instID, instID, source, dest);
- }
-
- /* Shortcuts */
- typedef RayK<1> Ray;
- typedef RayK<4> Ray4;
- typedef RayK<8> Ray8;
- typedef RayK<16> Ray16;
- struct RayN;
-
- typedef RayHitK<1> RayHit;
- typedef RayHitK<4> RayHit4;
- typedef RayHitK<8> RayHit8;
- typedef RayHitK<16> RayHit16;
- struct RayHitN;
-
- template<int K, bool intersect>
- struct RayTypeHelper;
-
- template<int K>
- struct RayTypeHelper<K, true>
- {
- typedef RayHitK<K> Ty;
- };
-
- template<int K>
- struct RayTypeHelper<K, false>
- {
- typedef RayK<K> Ty;
- };
-
- template<bool intersect>
- using RayType = typename RayTypeHelper<1, intersect>::Ty;
-
- template<int K, bool intersect>
- using RayTypeK = typename RayTypeHelper<K, intersect>::Ty;
-
- /* Outputs ray to stream */
- template<int K>
- __forceinline embree_ostream operator <<(embree_ostream cout, const RayK<K>& ray)
- {
- return cout << "{ " << embree_endl
- << " org = " << ray.org << embree_endl
- << " dir = " << ray.dir << embree_endl
- << " near = " << ray.tnear() << embree_endl
- << " far = " << ray.tfar << embree_endl
- << " time = " << ray.time() << embree_endl
- << " mask = " << ray.mask << embree_endl
- << " id = " << ray.id << embree_endl
- << " flags = " << ray.flags << embree_endl
- << "}";
- }
-
- template<int K>
- __forceinline embree_ostream operator <<(embree_ostream cout, const RayHitK<K>& ray)
- {
- cout << "{ " << embree_endl
- << " org = " << ray.org << embree_endl
- << " dir = " << ray.dir << embree_endl
- << " near = " << ray.tnear() << embree_endl
- << " far = " << ray.tfar << embree_endl
- << " time = " << ray.time() << embree_endl
- << " mask = " << ray.mask << embree_endl
- << " id = " << ray.id << embree_endl
- << " flags = " << ray.flags << embree_endl
- << " Ng = " << ray.Ng
- << " u = " << ray.u << embree_endl
- << " v = " << ray.v << embree_endl
- << " primID = " << ray.primID << embree_endl
- << " geomID = " << ray.geomID << embree_endl
- << " instID =";
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
- {
- cout << " " << ray.instID[l];
- }
- cout << embree_endl;
- return cout << "}";
- }
-
- struct RayStreamSOA
- {
- __forceinline RayStreamSOA(void* rays, size_t N)
- : ptr((char*)rays), N(N) {}
-
- /* ray data access functions */
- __forceinline float* org_x(size_t offset = 0) { return (float*)&ptr[0*4*N+offset]; } // x coordinate of ray origin
- __forceinline float* org_y(size_t offset = 0) { return (float*)&ptr[1*4*N+offset]; } // y coordinate of ray origin
- __forceinline float* org_z(size_t offset = 0) { return (float*)&ptr[2*4*N+offset]; }; // z coordinate of ray origin
- __forceinline float* tnear(size_t offset = 0) { return (float*)&ptr[3*4*N+offset]; }; // start of ray segment
-
- __forceinline float* dir_x(size_t offset = 0) { return (float*)&ptr[4*4*N+offset]; }; // x coordinate of ray direction
- __forceinline float* dir_y(size_t offset = 0) { return (float*)&ptr[5*4*N+offset]; }; // y coordinate of ray direction
- __forceinline float* dir_z(size_t offset = 0) { return (float*)&ptr[6*4*N+offset]; }; // z coordinate of ray direction
- __forceinline float* time (size_t offset = 0) { return (float*)&ptr[7*4*N+offset]; }; // time of this ray for motion blur
-
- __forceinline float* tfar (size_t offset = 0) { return (float*)&ptr[8*4*N+offset]; }; // end of ray segment (set to hit distance)
- __forceinline int* mask (size_t offset = 0) { return (int*)&ptr[9*4*N+offset]; }; // used to mask out objects during traversal (optional)
- __forceinline int* id (size_t offset = 0) { return (int*)&ptr[10*4*N+offset]; }; // id
- __forceinline int* flags(size_t offset = 0) { return (int*)&ptr[11*4*N+offset]; }; // flags
-
- /* hit data access functions */
- __forceinline float* Ng_x(size_t offset = 0) { return (float*)&ptr[12*4*N+offset]; }; // x coordinate of geometry normal
- __forceinline float* Ng_y(size_t offset = 0) { return (float*)&ptr[13*4*N+offset]; }; // y coordinate of geometry normal
- __forceinline float* Ng_z(size_t offset = 0) { return (float*)&ptr[14*4*N+offset]; }; // z coordinate of geometry normal
-
- __forceinline float* u(size_t offset = 0) { return (float*)&ptr[15*4*N+offset]; }; // barycentric u coordinate of hit
- __forceinline float* v(size_t offset = 0) { return (float*)&ptr[16*4*N+offset]; }; // barycentric v coordinate of hit
-
- __forceinline unsigned int* primID(size_t offset = 0) { return (unsigned int*)&ptr[17*4*N+offset]; }; // primitive ID
- __forceinline unsigned int* geomID(size_t offset = 0) { return (unsigned int*)&ptr[18*4*N+offset]; }; // geometry ID
- __forceinline unsigned int* instID(size_t level, size_t offset = 0) { return (unsigned int*)&ptr[19*4*N+level*4*N+offset]; }; // instance ID
-
- __forceinline Ray getRayByOffset(size_t offset)
- {
- Ray ray;
- ray.org.x = org_x(offset)[0];
- ray.org.y = org_y(offset)[0];
- ray.org.z = org_z(offset)[0];
- ray.tnear() = tnear(offset)[0];
- ray.dir.x = dir_x(offset)[0];
- ray.dir.y = dir_y(offset)[0];
- ray.dir.z = dir_z(offset)[0];
- ray.time() = time(offset)[0];
- ray.tfar = tfar(offset)[0];
- ray.mask = mask(offset)[0];
- ray.id = id(offset)[0];
- ray.flags = flags(offset)[0];
- return ray;
- }
-
- template<int K>
- __forceinline RayK<K> getRayByOffset(size_t offset)
- {
- RayK<K> ray;
- ray.org.x = vfloat<K>::loadu(org_x(offset));
- ray.org.y = vfloat<K>::loadu(org_y(offset));
- ray.org.z = vfloat<K>::loadu(org_z(offset));
- ray.tnear = vfloat<K>::loadu(tnear(offset));
- ray.dir.x = vfloat<K>::loadu(dir_x(offset));
- ray.dir.y = vfloat<K>::loadu(dir_y(offset));
- ray.dir.z = vfloat<K>::loadu(dir_z(offset));
- ray.time = vfloat<K>::loadu(time(offset));
- ray.tfar = vfloat<K>::loadu(tfar(offset));
- ray.mask = vint<K>::loadu(mask(offset));
- ray.id = vint<K>::loadu(id(offset));
- ray.flags = vint<K>::loadu(flags(offset));
- return ray;
- }
-
- template<int K>
- __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, size_t offset)
- {
- RayK<K> ray;
- ray.org.x = vfloat<K>::loadu(valid, org_x(offset));
- ray.org.y = vfloat<K>::loadu(valid, org_y(offset));
- ray.org.z = vfloat<K>::loadu(valid, org_z(offset));
- ray.tnear() = vfloat<K>::loadu(valid, tnear(offset));
- ray.dir.x = vfloat<K>::loadu(valid, dir_x(offset));
- ray.dir.y = vfloat<K>::loadu(valid, dir_y(offset));
- ray.dir.z = vfloat<K>::loadu(valid, dir_z(offset));
- ray.time() = vfloat<K>::loadu(valid, time(offset));
- ray.tfar = vfloat<K>::loadu(valid, tfar(offset));
-
-#if !defined(__AVX__)
- /* SSE: some ray members must be loaded with scalar instructions to ensure that we don't cause memory faults,
- because the SSE masked loads always access the entire vector */
- if (unlikely(!all(valid)))
- {
- ray.mask = zero;
- ray.id = zero;
- ray.flags = zero;
-
- for (size_t k = 0; k < K; k++)
- {
- if (likely(valid[k]))
- {
- ray.mask[k] = mask(offset)[k];
- ray.id[k] = id(offset)[k];
- ray.flags[k] = flags(offset)[k];
- }
- }
- }
- else
-#endif
- {
- ray.mask = vint<K>::loadu(valid, mask(offset));
- ray.id = vint<K>::loadu(valid, id(offset));
- ray.flags = vint<K>::loadu(valid, flags(offset));
- }
-
- return ray;
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayHitK<K>& ray)
- {
- /*
- * valid_i: stores which of the input rays exist (do not access nonexistent rays!)
- * valid: stores which of the rays actually hit something.
- */
- vbool<K> valid = valid_i;
- valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
-
- if (likely(any(valid)))
- {
- vfloat<K>::storeu(valid, tfar(offset), ray.tfar);
- vfloat<K>::storeu(valid, Ng_x(offset), ray.Ng.x);
- vfloat<K>::storeu(valid, Ng_y(offset), ray.Ng.y);
- vfloat<K>::storeu(valid, Ng_z(offset), ray.Ng.z);
- vfloat<K>::storeu(valid, u(offset), ray.u);
- vfloat<K>::storeu(valid, v(offset), ray.v);
-
-#if !defined(__AVX__)
- /* SSE: some ray members must be stored with scalar instructions to ensure that we don't cause memory faults,
- because the SSE masked stores always access the entire vector */
- if (unlikely(!all(valid_i)))
- {
- for (size_t k = 0; k < K; k++)
- {
- if (likely(valid[k]))
- {
- primID(offset)[k] = ray.primID[k];
- geomID(offset)[k] = ray.geomID[k];
-
- instID(0, offset)[k] = ray.instID[0][k];
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l)
- instID(l, offset)[k] = ray.instID[l][k];
-#endif
- }
- }
- }
- else
-#endif
- {
- vuint<K>::storeu(valid, primID(offset), ray.primID);
- vuint<K>::storeu(valid, geomID(offset), ray.geomID);
-
- vuint<K>::storeu(valid, instID(0, offset), ray.instID[0]);
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
- vuint<K>::storeu(valid, instID(l, offset), ray.instID[l]);
-#endif
- }
- }
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.tfar < 0.0f);
-
- if (likely(any(valid)))
- vfloat<K>::storeu(valid, tfar(offset), ray.tfar);
- }
-
- __forceinline size_t getOctantByOffset(size_t offset)
- {
- const float dx = dir_x(offset)[0];
- const float dy = dir_y(offset)[0];
- const float dz = dir_z(offset)[0];
- const size_t octantID = (dx < 0.0f ? 1 : 0) + (dy < 0.0f ? 2 : 0) + (dz < 0.0f ? 4 : 0);
- return octantID;
- }
-
- __forceinline bool isValidByOffset(size_t offset)
- {
- const float nnear = tnear(offset)[0];
- const float ffar = tfar(offset)[0];
- return nnear <= ffar;
- }
-
- template<int K>
- __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
- {
- RayK<K> ray;
-
-#if defined(__AVX2__)
- ray.org.x = vfloat<K>::template gather<1>(valid, org_x(), offset);
- ray.org.y = vfloat<K>::template gather<1>(valid, org_y(), offset);
- ray.org.z = vfloat<K>::template gather<1>(valid, org_z(), offset);
- ray.tnear() = vfloat<K>::template gather<1>(valid, tnear(), offset);
- ray.dir.x = vfloat<K>::template gather<1>(valid, dir_x(), offset);
- ray.dir.y = vfloat<K>::template gather<1>(valid, dir_y(), offset);
- ray.dir.z = vfloat<K>::template gather<1>(valid, dir_z(), offset);
- ray.time() = vfloat<K>::template gather<1>(valid, time(), offset);
- ray.tfar = vfloat<K>::template gather<1>(valid, tfar(), offset);
- ray.mask = vint<K>::template gather<1>(valid, mask(), offset);
- ray.id = vint<K>::template gather<1>(valid, id(), offset);
- ray.flags = vint<K>::template gather<1>(valid, flags(), offset);
-#else
- ray.org = zero;
- ray.tnear() = zero;
- ray.dir = zero;
- ray.time() = zero;
- ray.tfar = zero;
- ray.mask = zero;
- ray.id = zero;
- ray.flags = zero;
-
- for (size_t k = 0; k < K; k++)
- {
- if (likely(valid[k]))
- {
- const size_t ofs = offset[k];
-
- ray.org.x[k] = *org_x(ofs);
- ray.org.y[k] = *org_y(ofs);
- ray.org.z[k] = *org_z(ofs);
- ray.tnear()[k] = *tnear(ofs);
- ray.dir.x[k] = *dir_x(ofs);
- ray.dir.y[k] = *dir_y(ofs);
- ray.dir.z[k] = *dir_z(ofs);
- ray.time()[k] = *time(ofs);
- ray.tfar[k] = *tfar(ofs);
- ray.mask[k] = *mask(ofs);
- ray.id[k] = *id(ofs);
- ray.flags[k] = *flags(ofs);
- }
- }
-#endif
-
- return ray;
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
-
- if (likely(any(valid)))
- {
-#if defined(__AVX512F__)
- vfloat<K>::template scatter<1>(valid, tfar(), offset, ray.tfar);
- vfloat<K>::template scatter<1>(valid, Ng_x(), offset, ray.Ng.x);
- vfloat<K>::template scatter<1>(valid, Ng_y(), offset, ray.Ng.y);
- vfloat<K>::template scatter<1>(valid, Ng_z(), offset, ray.Ng.z);
- vfloat<K>::template scatter<1>(valid, u(), offset, ray.u);
- vfloat<K>::template scatter<1>(valid, v(), offset, ray.v);
- vuint<K>::template scatter<1>(valid, primID(), offset, ray.primID);
- vuint<K>::template scatter<1>(valid, geomID(), offset, ray.geomID);
-
- vuint<K>::template scatter<1>(valid, instID(0), offset, ray.instID[0]);
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
- vuint<K>::template scatter<1>(valid, instID(l), offset, ray.instID[l]);
-#endif
-#else
- size_t valid_bits = movemask(valid);
- while (valid_bits != 0)
- {
- const size_t k = bscf(valid_bits);
- const size_t ofs = offset[k];
-
- *tfar(ofs) = ray.tfar[k];
-
- *Ng_x(ofs) = ray.Ng.x[k];
- *Ng_y(ofs) = ray.Ng.y[k];
- *Ng_z(ofs) = ray.Ng.z[k];
- *u(ofs) = ray.u[k];
- *v(ofs) = ray.v[k];
- *primID(ofs) = ray.primID[k];
- *geomID(ofs) = ray.geomID[k];
-
- *instID(0, ofs) = ray.instID[0][k];
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l)
- *instID(l, ofs) = ray.instID[l][k];
-#endif
- }
-#endif
- }
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.tfar < 0.0f);
-
- if (likely(any(valid)))
- {
-#if defined(__AVX512F__)
- vfloat<K>::template scatter<1>(valid, tfar(), offset, ray.tfar);
-#else
- size_t valid_bits = movemask(valid);
- while (valid_bits != 0)
- {
- const size_t k = bscf(valid_bits);
- const size_t ofs = offset[k];
-
- *tfar(ofs) = ray.tfar[k];
- }
-#endif
- }
- }
-
- char* __restrict__ ptr;
- size_t N;
- };
-
- template<size_t MAX_K>
- struct StackRayStreamSOA : public RayStreamSOA
- {
- __forceinline StackRayStreamSOA(size_t K)
- : RayStreamSOA(data, K) { assert(K <= MAX_K); }
-
- char data[MAX_K / 4 * sizeof(RayHit4)];
- };
-
-
- struct RayStreamSOP
- {
- template<class T>
- __forceinline void init(T& t)
- {
- org_x = (float*)&t.org.x;
- org_y = (float*)&t.org.y;
- org_z = (float*)&t.org.z;
- tnear = (float*)&t.tnear;
- dir_x = (float*)&t.dir.x;
- dir_y = (float*)&t.dir.y;
- dir_z = (float*)&t.dir.z;
- time = (float*)&t.time;
- tfar = (float*)&t.tfar;
- mask = (unsigned int*)&t.mask;
- id = (unsigned int*)&t.id;
- flags = (unsigned int*)&t.flags;
-
- Ng_x = (float*)&t.Ng.x;
- Ng_y = (float*)&t.Ng.y;
- Ng_z = (float*)&t.Ng.z;
- u = (float*)&t.u;
- v = (float*)&t.v;
- primID = (unsigned int*)&t.primID;
- geomID = (unsigned int*)&t.geomID;
-
- for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
- instID[l] = (unsigned int*)&t.instID[l];
- }
-
- __forceinline Ray getRayByOffset(size_t offset)
- {
- Ray ray;
- ray.org.x = *(float* __restrict__)((char*)org_x + offset);
- ray.org.y = *(float* __restrict__)((char*)org_y + offset);
- ray.org.z = *(float* __restrict__)((char*)org_z + offset);
- ray.dir.x = *(float* __restrict__)((char*)dir_x + offset);
- ray.dir.y = *(float* __restrict__)((char*)dir_y + offset);
- ray.dir.z = *(float* __restrict__)((char*)dir_z + offset);
- ray.tfar = *(float* __restrict__)((char*)tfar + offset);
- ray.tnear() = tnear ? *(float* __restrict__)((char*)tnear + offset) : 0.0f;
- ray.time() = time ? *(float* __restrict__)((char*)time + offset) : 0.0f;
- ray.mask = mask ? *(unsigned int* __restrict__)((char*)mask + offset) : -1;
- ray.id = id ? *(unsigned int* __restrict__)((char*)id + offset) : -1;
- ray.flags = flags ? *(unsigned int* __restrict__)((char*)flags + offset) : -1;
- return ray;
- }
-
- template<int K>
- __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, size_t offset)
- {
- RayK<K> ray;
- ray.org.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_x + offset));
- ray.org.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_y + offset));
- ray.org.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_z + offset));
- ray.dir.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_x + offset));
- ray.dir.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_y + offset));
- ray.dir.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_z + offset));
- ray.tfar = vfloat<K>::loadu(valid, (float* __restrict__)((char*)tfar + offset));
- ray.tnear() = tnear ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)tnear + offset)) : 0.0f;
- ray.time() = time ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)time + offset)) : 0.0f;
- ray.mask = mask ? vint<K>::loadu(valid, (const void* __restrict__)((char*)mask + offset)) : -1;
- ray.id = id ? vint<K>::loadu(valid, (const void* __restrict__)((char*)id + offset)) : -1;
- ray.flags = flags ? vint<K>::loadu(valid, (const void* __restrict__)((char*)flags + offset)) : -1;
- return ray;
- }
-
- template<int K>
- __forceinline Vec3vf<K> getDirByOffset(const vbool<K>& valid, size_t offset)
- {
- Vec3vf<K> dir;
- dir.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_x + offset));
- dir.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_y + offset));
- dir.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_z + offset));
- return dir;
- }
-
- __forceinline void setHitByOffset(size_t offset, const RayHit& ray)
- {
- if (ray.geomID != RTC_INVALID_GEOMETRY_ID)
- {
- *(float* __restrict__)((char*)tfar + offset) = ray.tfar;
-
- if (likely(Ng_x)) *(float* __restrict__)((char*)Ng_x + offset) = ray.Ng.x;
- if (likely(Ng_y)) *(float* __restrict__)((char*)Ng_y + offset) = ray.Ng.y;
- if (likely(Ng_z)) *(float* __restrict__)((char*)Ng_z + offset) = ray.Ng.z;
- *(float* __restrict__)((char*)u + offset) = ray.u;
- *(float* __restrict__)((char*)v + offset) = ray.v;
- *(unsigned int* __restrict__)((char*)geomID + offset) = ray.geomID;
- *(unsigned int* __restrict__)((char*)primID + offset) = ray.primID;
-
- if (likely(instID[0])) {
- *(unsigned int* __restrict__)((char*)instID[0] + offset) = ray.instID[0];
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID; ++l)
- *(unsigned int* __restrict__)((char*)instID[l] + offset) = ray.instID[l];
-#endif
- }
- }
- }
-
- __forceinline void setHitByOffset(size_t offset, const Ray& ray)
- {
- *(float* __restrict__)((char*)tfar + offset) = ray.tfar;
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayHitK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
-
- if (likely(any(valid)))
- {
- vfloat<K>::storeu(valid, (float* __restrict__)((char*)tfar + offset), ray.tfar);
-
- if (likely(Ng_x)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_x + offset), ray.Ng.x);
- if (likely(Ng_y)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_y + offset), ray.Ng.y);
- if (likely(Ng_z)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_z + offset), ray.Ng.z);
- vfloat<K>::storeu(valid, (float* __restrict__)((char*)u + offset), ray.u);
- vfloat<K>::storeu(valid, (float* __restrict__)((char*)v + offset), ray.v);
- vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)primID + offset), ray.primID);
- vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)geomID + offset), ray.geomID);
-
- if (likely(instID[0])) {
- vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instID[0] + offset), ray.instID[0]);
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
- vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instID[l] + offset), ray.instID[l]);
-#endif
- }
- }
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.tfar < 0.0f);
-
- if (likely(any(valid)))
- vfloat<K>::storeu(valid, (float* __restrict__)((char*)tfar + offset), ray.tfar);
- }
-
- __forceinline size_t getOctantByOffset(size_t offset)
- {
- const float dx = *(float* __restrict__)((char*)dir_x + offset);
- const float dy = *(float* __restrict__)((char*)dir_y + offset);
- const float dz = *(float* __restrict__)((char*)dir_z + offset);
- const size_t octantID = (dx < 0.0f ? 1 : 0) + (dy < 0.0f ? 2 : 0) + (dz < 0.0f ? 4 : 0);
- return octantID;
- }
-
- __forceinline bool isValidByOffset(size_t offset)
- {
- const float nnear = tnear ? *(float* __restrict__)((char*)tnear + offset) : 0.0f;
- const float ffar = *(float* __restrict__)((char*)tfar + offset);
- return nnear <= ffar;
- }
-
- template<int K>
- __forceinline vbool<K> isValidByOffset(const vbool<K>& valid, size_t offset)
- {
- const vfloat<K> nnear = tnear ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)tnear + offset)) : 0.0f;
- const vfloat<K> ffar = vfloat<K>::loadu(valid, (float* __restrict__)((char*)tfar + offset));
- return nnear <= ffar;
- }
-
- template<int K>
- __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
- {
- RayK<K> ray;
-
-#if defined(__AVX2__)
- ray.org.x = vfloat<K>::template gather<1>(valid, org_x, offset);
- ray.org.y = vfloat<K>::template gather<1>(valid, org_y, offset);
- ray.org.z = vfloat<K>::template gather<1>(valid, org_z, offset);
- ray.dir.x = vfloat<K>::template gather<1>(valid, dir_x, offset);
- ray.dir.y = vfloat<K>::template gather<1>(valid, dir_y, offset);
- ray.dir.z = vfloat<K>::template gather<1>(valid, dir_z, offset);
- ray.tfar = vfloat<K>::template gather<1>(valid, tfar, offset);
- ray.tnear() = tnear ? vfloat<K>::template gather<1>(valid, tnear, offset) : vfloat<K>(zero);
- ray.time() = time ? vfloat<K>::template gather<1>(valid, time, offset) : vfloat<K>(zero);
- ray.mask = mask ? vint<K>::template gather<1>(valid, (int*)mask, offset) : vint<K>(-1);
- ray.id = id ? vint<K>::template gather<1>(valid, (int*)id, offset) : vint<K>(-1);
- ray.flags = flags ? vint<K>::template gather<1>(valid, (int*)flags, offset) : vint<K>(-1);
-#else
- ray.org = zero;
- ray.tnear() = zero;
- ray.dir = zero;
- ray.tfar = zero;
- ray.time() = zero;
- ray.mask = zero;
- ray.id = zero;
- ray.flags = zero;
-
- for (size_t k = 0; k < K; k++)
- {
- if (likely(valid[k]))
- {
- const size_t ofs = offset[k];
-
- ray.org.x[k] = *(float* __restrict__)((char*)org_x + ofs);
- ray.org.y[k] = *(float* __restrict__)((char*)org_y + ofs);
- ray.org.z[k] = *(float* __restrict__)((char*)org_z + ofs);
- ray.dir.x[k] = *(float* __restrict__)((char*)dir_x + ofs);
- ray.dir.y[k] = *(float* __restrict__)((char*)dir_y + ofs);
- ray.dir.z[k] = *(float* __restrict__)((char*)dir_z + ofs);
- ray.tfar[k] = *(float* __restrict__)((char*)tfar + ofs);
- ray.tnear()[k] = tnear ? *(float* __restrict__)((char*)tnear + ofs) : 0.0f;
- ray.time()[k] = time ? *(float* __restrict__)((char*)time + ofs) : 0.0f;
- ray.mask[k] = mask ? *(int* __restrict__)((char*)mask + ofs) : -1;
- ray.id[k] = id ? *(int* __restrict__)((char*)id + ofs) : -1;
- ray.flags[k] = flags ? *(int* __restrict__)((char*)flags + ofs) : -1;
- }
- }
-#endif
-
- return ray;
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
-
- if (likely(any(valid)))
- {
-#if defined(__AVX512F__)
- vfloat<K>::template scatter<1>(valid, tfar, offset, ray.tfar);
-
- if (likely(Ng_x)) vfloat<K>::template scatter<1>(valid, Ng_x, offset, ray.Ng.x);
- if (likely(Ng_y)) vfloat<K>::template scatter<1>(valid, Ng_y, offset, ray.Ng.y);
- if (likely(Ng_z)) vfloat<K>::template scatter<1>(valid, Ng_z, offset, ray.Ng.z);
- vfloat<K>::template scatter<1>(valid, u, offset, ray.u);
- vfloat<K>::template scatter<1>(valid, v, offset, ray.v);
- vuint<K>::template scatter<1>(valid, (unsigned int*)geomID, offset, ray.geomID);
- vuint<K>::template scatter<1>(valid, (unsigned int*)primID, offset, ray.primID);
-
- if (likely(instID[0])) {
- vuint<K>::template scatter<1>(valid, (unsigned int*)instID[0], offset, ray.instID[0]);
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
- vuint<K>::template scatter<1>(valid, (unsigned int*)instID[l], offset, ray.instID[l]);
-#endif
- }
-#else
- size_t valid_bits = movemask(valid);
- while (valid_bits != 0)
- {
- const size_t k = bscf(valid_bits);
- const size_t ofs = offset[k];
-
- *(float* __restrict__)((char*)tfar + ofs) = ray.tfar[k];
-
- if (likely(Ng_x)) *(float* __restrict__)((char*)Ng_x + ofs) = ray.Ng.x[k];
- if (likely(Ng_y)) *(float* __restrict__)((char*)Ng_y + ofs) = ray.Ng.y[k];
- if (likely(Ng_z)) *(float* __restrict__)((char*)Ng_z + ofs) = ray.Ng.z[k];
- *(float* __restrict__)((char*)u + ofs) = ray.u[k];
- *(float* __restrict__)((char*)v + ofs) = ray.v[k];
- *(unsigned int* __restrict__)((char*)primID + ofs) = ray.primID[k];
- *(unsigned int* __restrict__)((char*)geomID + ofs) = ray.geomID[k];
-
- if (likely(instID[0])) {
- *(unsigned int* __restrict__)((char*)instID[0] + ofs) = ray.instID[0][k];
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l)
- *(unsigned int* __restrict__)((char*)instID[l] + ofs) = ray.instID[l][k];
-#endif
- }
- }
-#endif
- }
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.tfar < 0.0f);
-
- if (likely(any(valid)))
- {
-#if defined(__AVX512F__)
- vfloat<K>::template scatter<1>(valid, tfar, offset, ray.tfar);
-#else
- size_t valid_bits = movemask(valid);
- while (valid_bits != 0)
- {
- const size_t k = bscf(valid_bits);
- const size_t ofs = offset[k];
-
- *(float* __restrict__)((char*)tfar + ofs) = ray.tfar[k];
- }
-#endif
- }
- }
-
- /* ray data */
- float* __restrict__ org_x; // x coordinate of ray origin
- float* __restrict__ org_y; // y coordinate of ray origin
- float* __restrict__ org_z; // z coordinate of ray origin
- float* __restrict__ tnear; // start of ray segment (optional)
-
- float* __restrict__ dir_x; // x coordinate of ray direction
- float* __restrict__ dir_y; // y coordinate of ray direction
- float* __restrict__ dir_z; // z coordinate of ray direction
- float* __restrict__ time; // time of this ray for motion blur (optional)
-
- float* __restrict__ tfar; // end of ray segment (set to hit distance)
- unsigned int* __restrict__ mask; // used to mask out objects during traversal (optional)
- unsigned int* __restrict__ id; // ray ID
- unsigned int* __restrict__ flags; // ray flags
-
- /* hit data */
- float* __restrict__ Ng_x; // x coordinate of geometry normal (optional)
- float* __restrict__ Ng_y; // y coordinate of geometry normal (optional)
- float* __restrict__ Ng_z; // z coordinate of geometry normal (optional)
-
- float* __restrict__ u; // barycentric u coordinate of hit
- float* __restrict__ v; // barycentric v coordinate of hit
-
- unsigned int* __restrict__ primID; // primitive ID
- unsigned int* __restrict__ geomID; // geometry ID
- unsigned int* __restrict__ instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID (optional)
- };
-
-
- struct RayStreamAOS
- {
- __forceinline RayStreamAOS(void* rays)
- : ptr((Ray*)rays) {}
-
- __forceinline Ray& getRayByOffset(size_t offset)
- {
- return *(Ray*)((char*)ptr + offset);
- }
-
- template<int K>
- __forceinline RayK<K> getRayByOffset(const vint<K>& offset);
-
- template<int K>
- __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
- {
- const vint<K> valid_offset = select(valid, offset, vintx(zero));
- return getRayByOffset(valid_offset);
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
-
- if (likely(any(valid)))
- {
-#if defined(__AVX512F__)
- vfloat<K>::template scatter<1>(valid, &ptr->tfar, offset, ray.tfar);
- vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.x, offset, ray.Ng.x);
- vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.y, offset, ray.Ng.y);
- vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.z, offset, ray.Ng.z);
- vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->u, offset, ray.u);
- vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->v, offset, ray.v);
- vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->primID, offset, ray.primID);
- vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->geomID, offset, ray.geomID);
-
- vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instID[0], offset, ray.instID[0]);
-#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
- for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
- vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instID[l], offset, ray.instID[l]);
-#endif
-#else
- size_t valid_bits = movemask(valid);
- while (valid_bits != 0)
- {
- const size_t k = bscf(valid_bits);
- RayHit* __restrict__ ray_k = (RayHit*)((char*)ptr + offset[k]);
- ray_k->tfar = ray.tfar[k];
- ray_k->Ng.x = ray.Ng.x[k];
- ray_k->Ng.y = ray.Ng.y[k];
- ray_k->Ng.z = ray.Ng.z[k];
- ray_k->u = ray.u[k];
- ray_k->v = ray.v[k];
- ray_k->primID = ray.primID[k];
- ray_k->geomID = ray.geomID[k];
-
- instance_id_stack::copy(ray.instID, ray_k->instID, k);
- }
-#endif
- }
- }
-
- template<int K>
- __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.tfar < 0.0f);
-
- if (likely(any(valid)))
- {
-#if defined(__AVX512F__)
- vfloat<K>::template scatter<1>(valid, &ptr->tfar, offset, ray.tfar);
-#else
- size_t valid_bits = movemask(valid);
- while (valid_bits != 0)
- {
- const size_t k = bscf(valid_bits);
- Ray* __restrict__ ray_k = (Ray*)((char*)ptr + offset[k]);
- ray_k->tfar = ray.tfar[k];
- }
-#endif
- }
- }
-
- Ray* __restrict__ ptr;
- };
-
- template<>
- __forceinline Ray4 RayStreamAOS::getRayByOffset(const vint4& offset)
- {
- Ray4 ray;
-
- /* load and transpose: org.x, org.y, org.z, tnear */
- const vfloat4 a0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->org);
- const vfloat4 a1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->org);
- const vfloat4 a2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->org);
- const vfloat4 a3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->org);
-
- transpose(a0,a1,a2,a3, ray.org.x, ray.org.y, ray.org.z, ray.tnear());
-
- /* load and transpose: dir.x, dir.y, dir.z, time */
- const vfloat4 b0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->dir);
- const vfloat4 b1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->dir);
- const vfloat4 b2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->dir);
- const vfloat4 b3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->dir);
-
- transpose(b0,b1,b2,b3, ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
-
- /* load and transpose: tfar, mask, id, flags */
- const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->tfar);
- const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->tfar);
- const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->tfar);
- const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->tfar);
-
- vfloat4 maskf, idf, flagsf;
- transpose(c0,c1,c2,c3, ray.tfar, maskf, idf, flagsf);
- ray.mask = asInt(maskf);
- ray.id = asInt(idf);
- ray.flags = asInt(flagsf);
-
- return ray;
- }
-
-#if defined(__AVX__)
- template<>
- __forceinline Ray8 RayStreamAOS::getRayByOffset(const vint8& offset)
- {
- Ray8 ray;
-
- /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
- const vfloat8 ab0 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[0]))->org);
- const vfloat8 ab1 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[1]))->org);
- const vfloat8 ab2 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[2]))->org);
- const vfloat8 ab3 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[3]))->org);
- const vfloat8 ab4 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[4]))->org);
- const vfloat8 ab5 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[5]))->org);
- const vfloat8 ab6 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[6]))->org);
- const vfloat8 ab7 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[7]))->org);
-
- transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
-
- /* load and transpose: tfar, mask, id, flags */
- const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->tfar);
- const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->tfar);
- const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->tfar);
- const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->tfar);
- const vfloat4 c4 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[4]))->tfar);
- const vfloat4 c5 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[5]))->tfar);
- const vfloat4 c6 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[6]))->tfar);
- const vfloat4 c7 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[7]))->tfar);
-
- vfloat8 maskf, idf, flagsf;
- transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf);
- ray.mask = asInt(maskf);
- ray.id = asInt(idf);
- ray.flags = asInt(flagsf);
-
- return ray;
- }
-#endif
-
-#if defined(__AVX512F__)
- template<>
- __forceinline Ray16 RayStreamAOS::getRayByOffset(const vint16& offset)
- {
- Ray16 ray;
-
- /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
- const vfloat8 ab0 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 0]))->org);
- const vfloat8 ab1 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 1]))->org);
- const vfloat8 ab2 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 2]))->org);
- const vfloat8 ab3 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 3]))->org);
- const vfloat8 ab4 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 4]))->org);
- const vfloat8 ab5 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 5]))->org);
- const vfloat8 ab6 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 6]))->org);
- const vfloat8 ab7 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 7]))->org);
- const vfloat8 ab8 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 8]))->org);
- const vfloat8 ab9 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 9]))->org);
- const vfloat8 ab10 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[10]))->org);
- const vfloat8 ab11 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[11]))->org);
- const vfloat8 ab12 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[12]))->org);
- const vfloat8 ab13 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[13]))->org);
- const vfloat8 ab14 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[14]))->org);
- const vfloat8 ab15 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[15]))->org);
-
- transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15,
- ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
-
- /* load and transpose: tfar, mask, id, flags */
- const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 0]))->tfar);
- const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 1]))->tfar);
- const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 2]))->tfar);
- const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 3]))->tfar);
- const vfloat4 c4 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 4]))->tfar);
- const vfloat4 c5 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 5]))->tfar);
- const vfloat4 c6 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 6]))->tfar);
- const vfloat4 c7 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 7]))->tfar);
- const vfloat4 c8 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 8]))->tfar);
- const vfloat4 c9 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 9]))->tfar);
- const vfloat4 c10 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[10]))->tfar);
- const vfloat4 c11 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[11]))->tfar);
- const vfloat4 c12 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[12]))->tfar);
- const vfloat4 c13 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[13]))->tfar);
- const vfloat4 c14 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[14]))->tfar);
- const vfloat4 c15 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[15]))->tfar);
-
- vfloat16 maskf, idf, flagsf;
- transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,
- ray.tfar, maskf, idf, flagsf);
- ray.mask = asInt(maskf);
- ray.id = asInt(idf);
- ray.flags = asInt(flagsf);
-
- return ray;
- }
-#endif
-
-
- struct RayStreamAOP
- {
- __forceinline RayStreamAOP(void* rays)
- : ptr((Ray**)rays) {}
-
- __forceinline Ray& getRayByIndex(size_t index)
- {
- return *ptr[index];
- }
-
- template<int K>
- __forceinline RayK<K> getRayByIndex(const vint<K>& index);
-
- template<int K>
- __forceinline RayK<K> getRayByIndex(const vbool<K>& valid, const vint<K>& index)
- {
- const vint<K> valid_index = select(valid, index, vintx(zero));
- return getRayByIndex(valid_index);
- }
-
- template<int K>
- __forceinline void setHitByIndex(const vbool<K>& valid_i, const vint<K>& index, const RayHitK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
-
- if (likely(any(valid)))
- {
- size_t valid_bits = movemask(valid);
- while (valid_bits != 0)
- {
- const size_t k = bscf(valid_bits);
- RayHit* __restrict__ ray_k = (RayHit*)ptr[index[k]];
-
- ray_k->tfar = ray.tfar[k];
- ray_k->Ng.x = ray.Ng.x[k];
- ray_k->Ng.y = ray.Ng.y[k];
- ray_k->Ng.z = ray.Ng.z[k];
- ray_k->u = ray.u[k];
- ray_k->v = ray.v[k];
- ray_k->primID = ray.primID[k];
- ray_k->geomID = ray.geomID[k];
- instance_id_stack::copy(ray.instID, ray_k->instID, k);
- }
- }
- }
-
- template<int K>
- __forceinline void setHitByIndex(const vbool<K>& valid_i, const vint<K>& index, const RayK<K>& ray)
- {
- vbool<K> valid = valid_i;
- valid &= (ray.tfar < 0.0f);
-
- if (likely(any(valid)))
- {
- size_t valid_bits = movemask(valid);
- while (valid_bits != 0)
- {
- const size_t k = bscf(valid_bits);
- Ray* __restrict__ ray_k = ptr[index[k]];
-
- ray_k->tfar = ray.tfar[k];
- }
- }
- }
-
- Ray** __restrict__ ptr;
- };
-
- template<>
- __forceinline Ray4 RayStreamAOP::getRayByIndex(const vint4& index)
- {
- Ray4 ray;
-
- /* load and transpose: org.x, org.y, org.z, tnear */
- const vfloat4 a0 = vfloat4::loadu(&ptr[index[0]]->org);
- const vfloat4 a1 = vfloat4::loadu(&ptr[index[1]]->org);
- const vfloat4 a2 = vfloat4::loadu(&ptr[index[2]]->org);
- const vfloat4 a3 = vfloat4::loadu(&ptr[index[3]]->org);
-
- transpose(a0,a1,a2,a3, ray.org.x, ray.org.y, ray.org.z, ray.tnear());
-
- /* load and transpose: dir.x, dir.y, dir.z, time */
- const vfloat4 b0 = vfloat4::loadu(&ptr[index[0]]->dir);
- const vfloat4 b1 = vfloat4::loadu(&ptr[index[1]]->dir);
- const vfloat4 b2 = vfloat4::loadu(&ptr[index[2]]->dir);
- const vfloat4 b3 = vfloat4::loadu(&ptr[index[3]]->dir);
-
- transpose(b0,b1,b2,b3, ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
-
- /* load and transpose: tfar, mask, id, flags */
- const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
- const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
- const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
- const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
-
- vfloat4 maskf, idf, flagsf;
- transpose(c0,c1,c2,c3, ray.tfar, maskf, idf, flagsf);
- ray.mask = asInt(maskf);
- ray.id = asInt(idf);
- ray.flags = asInt(flagsf);
-
- return ray;
- }
-
-#if defined(__AVX__)
- template<>
- __forceinline Ray8 RayStreamAOP::getRayByIndex(const vint8& index)
- {
- Ray8 ray;
-
- /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
- const vfloat8 ab0 = vfloat8::loadu(&ptr[index[0]]->org);
- const vfloat8 ab1 = vfloat8::loadu(&ptr[index[1]]->org);
- const vfloat8 ab2 = vfloat8::loadu(&ptr[index[2]]->org);
- const vfloat8 ab3 = vfloat8::loadu(&ptr[index[3]]->org);
- const vfloat8 ab4 = vfloat8::loadu(&ptr[index[4]]->org);
- const vfloat8 ab5 = vfloat8::loadu(&ptr[index[5]]->org);
- const vfloat8 ab6 = vfloat8::loadu(&ptr[index[6]]->org);
- const vfloat8 ab7 = vfloat8::loadu(&ptr[index[7]]->org);
-
- transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
-
- /* load and transpose: tfar, mask, id, flags */
- const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
- const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
- const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
- const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
- const vfloat4 c4 = vfloat4::loadu(&ptr[index[4]]->tfar);
- const vfloat4 c5 = vfloat4::loadu(&ptr[index[5]]->tfar);
- const vfloat4 c6 = vfloat4::loadu(&ptr[index[6]]->tfar);
- const vfloat4 c7 = vfloat4::loadu(&ptr[index[7]]->tfar);
-
- vfloat8 maskf, idf, flagsf;
- transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf);
- ray.mask = asInt(maskf);
- ray.id = asInt(idf);
- ray.flags = asInt(flagsf);
-
- return ray;
- }
-#endif
-
-#if defined(__AVX512F__)
- template<>
- __forceinline Ray16 RayStreamAOP::getRayByIndex(const vint16& index)
- {
- Ray16 ray;
-
- /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
- const vfloat8 ab0 = vfloat8::loadu(&ptr[index[0]]->org);
- const vfloat8 ab1 = vfloat8::loadu(&ptr[index[1]]->org);
- const vfloat8 ab2 = vfloat8::loadu(&ptr[index[2]]->org);
- const vfloat8 ab3 = vfloat8::loadu(&ptr[index[3]]->org);
- const vfloat8 ab4 = vfloat8::loadu(&ptr[index[4]]->org);
- const vfloat8 ab5 = vfloat8::loadu(&ptr[index[5]]->org);
- const vfloat8 ab6 = vfloat8::loadu(&ptr[index[6]]->org);
- const vfloat8 ab7 = vfloat8::loadu(&ptr[index[7]]->org);
- const vfloat8 ab8 = vfloat8::loadu(&ptr[index[8]]->org);
- const vfloat8 ab9 = vfloat8::loadu(&ptr[index[9]]->org);
- const vfloat8 ab10 = vfloat8::loadu(&ptr[index[10]]->org);
- const vfloat8 ab11 = vfloat8::loadu(&ptr[index[11]]->org);
- const vfloat8 ab12 = vfloat8::loadu(&ptr[index[12]]->org);
- const vfloat8 ab13 = vfloat8::loadu(&ptr[index[13]]->org);
- const vfloat8 ab14 = vfloat8::loadu(&ptr[index[14]]->org);
- const vfloat8 ab15 = vfloat8::loadu(&ptr[index[15]]->org);
-
- transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15,
- ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
-
- /* load and transpose: tfar, mask, id, flags */
- const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
- const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
- const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
- const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
- const vfloat4 c4 = vfloat4::loadu(&ptr[index[4]]->tfar);
- const vfloat4 c5 = vfloat4::loadu(&ptr[index[5]]->tfar);
- const vfloat4 c6 = vfloat4::loadu(&ptr[index[6]]->tfar);
- const vfloat4 c7 = vfloat4::loadu(&ptr[index[7]]->tfar);
- const vfloat4 c8 = vfloat4::loadu(&ptr[index[8]]->tfar);
- const vfloat4 c9 = vfloat4::loadu(&ptr[index[9]]->tfar);
- const vfloat4 c10 = vfloat4::loadu(&ptr[index[10]]->tfar);
- const vfloat4 c11 = vfloat4::loadu(&ptr[index[11]]->tfar);
- const vfloat4 c12 = vfloat4::loadu(&ptr[index[12]]->tfar);
- const vfloat4 c13 = vfloat4::loadu(&ptr[index[13]]->tfar);
- const vfloat4 c14 = vfloat4::loadu(&ptr[index[14]]->tfar);
- const vfloat4 c15 = vfloat4::loadu(&ptr[index[15]]->tfar);
-
- vfloat16 maskf, idf, flagsf;
- transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,
- ray.tfar, maskf, idf, flagsf);
-
- ray.mask = asInt(maskf);
- ray.id = asInt(idf);
- ray.flags = asInt(flagsf);
-
- return ray;
- }
-#endif
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/rtcore.cpp b/thirdparty/embree-aarch64/kernels/common/rtcore.cpp
deleted file mode 100644
index 625fbf6d4f..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/rtcore.cpp
+++ /dev/null
@@ -1,1799 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#define RTC_EXPORT_API
-
-#include "default.h"
-#include "device.h"
-#include "scene.h"
-#include "context.h"
-#include "../../include/embree3/rtcore_ray.h"
-
-#if defined(__aarch64__) && defined(BUILD_IOS)
-#include <mutex>
-#endif
-
-using namespace embree;
-
-RTC_NAMESPACE_BEGIN;
-
- /* mutex to make API thread safe */
-#if defined(__aarch64__) && defined(BUILD_IOS)
- static std::mutex g_mutex;
-#else
- static MutexSys g_mutex;
-#endif
-
- RTC_API RTCDevice rtcNewDevice(const char* config)
- {
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcNewDevice);
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(g_mutex);
-#else
- Lock<MutexSys> lock(g_mutex);
-#endif
- Device* device = new Device(config);
- return (RTCDevice) device->refInc();
- RTC_CATCH_END(nullptr);
- return (RTCDevice) nullptr;
- }
-
- RTC_API void rtcRetainDevice(RTCDevice hdevice)
- {
- Device* device = (Device*) hdevice;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcRetainDevice);
- RTC_VERIFY_HANDLE(hdevice);
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(g_mutex);
-#else
- Lock<MutexSys> lock(g_mutex);
-#endif
- device->refInc();
- RTC_CATCH_END(nullptr);
- }
-
- RTC_API void rtcReleaseDevice(RTCDevice hdevice)
- {
- Device* device = (Device*) hdevice;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcReleaseDevice);
- RTC_VERIFY_HANDLE(hdevice);
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(g_mutex);
-#else
- Lock<MutexSys> lock(g_mutex);
-#endif
- device->refDec();
- RTC_CATCH_END(nullptr);
- }
-
- RTC_API ssize_t rtcGetDeviceProperty(RTCDevice hdevice, RTCDeviceProperty prop)
- {
- Device* device = (Device*) hdevice;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetDeviceProperty);
- RTC_VERIFY_HANDLE(hdevice);
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(g_mutex);
-#else
- Lock<MutexSys> lock(g_mutex);
-#endif
- return device->getProperty(prop);
- RTC_CATCH_END(device);
- return 0;
- }
-
- RTC_API void rtcSetDeviceProperty(RTCDevice hdevice, const RTCDeviceProperty prop, ssize_t val)
- {
- Device* device = (Device*) hdevice;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetDeviceProperty);
- const bool internal_prop = (size_t)prop >= 1000000 && (size_t)prop < 1000004;
- if (!internal_prop) RTC_VERIFY_HANDLE(hdevice); // allow NULL device for special internal settings
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(g_mutex);
-#else
- Lock<MutexSys> lock(g_mutex);
-#endif
- device->setProperty(prop,val);
- RTC_CATCH_END(device);
- }
-
- RTC_API RTCError rtcGetDeviceError(RTCDevice hdevice)
- {
- Device* device = (Device*) hdevice;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetDeviceError);
- if (device == nullptr) return Device::getThreadErrorCode();
- else return device->getDeviceErrorCode();
- RTC_CATCH_END(device);
- return RTC_ERROR_UNKNOWN;
- }
-
- RTC_API void rtcSetDeviceErrorFunction(RTCDevice hdevice, RTCErrorFunction error, void* userPtr)
- {
- Device* device = (Device*) hdevice;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetDeviceErrorFunction);
- RTC_VERIFY_HANDLE(hdevice);
- device->setErrorFunction(error, userPtr);
- RTC_CATCH_END(device);
- }
-
- RTC_API void rtcSetDeviceMemoryMonitorFunction(RTCDevice hdevice, RTCMemoryMonitorFunction memoryMonitor, void* userPtr)
- {
- Device* device = (Device*) hdevice;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetDeviceMemoryMonitorFunction);
- device->setMemoryMonitorFunction(memoryMonitor, userPtr);
- RTC_CATCH_END(device);
- }
-
- RTC_API RTCBuffer rtcNewBuffer(RTCDevice hdevice, size_t byteSize)
- {
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcNewBuffer);
- RTC_VERIFY_HANDLE(hdevice);
- Buffer* buffer = new Buffer((Device*)hdevice, byteSize);
- return (RTCBuffer)buffer->refInc();
- RTC_CATCH_END((Device*)hdevice);
- return nullptr;
- }
-
- RTC_API RTCBuffer rtcNewSharedBuffer(RTCDevice hdevice, void* ptr, size_t byteSize)
- {
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcNewSharedBuffer);
- RTC_VERIFY_HANDLE(hdevice);
- Buffer* buffer = new Buffer((Device*)hdevice, byteSize, ptr);
- return (RTCBuffer)buffer->refInc();
- RTC_CATCH_END((Device*)hdevice);
- return nullptr;
- }
-
- RTC_API void* rtcGetBufferData(RTCBuffer hbuffer)
- {
- Buffer* buffer = (Buffer*)hbuffer;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetBufferData);
- RTC_VERIFY_HANDLE(hbuffer);
- return buffer->data();
- RTC_CATCH_END2(buffer);
- return nullptr;
- }
-
- RTC_API void rtcRetainBuffer(RTCBuffer hbuffer)
- {
- Buffer* buffer = (Buffer*)hbuffer;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcRetainBuffer);
- RTC_VERIFY_HANDLE(hbuffer);
- buffer->refInc();
- RTC_CATCH_END2(buffer);
- }
-
- RTC_API void rtcReleaseBuffer(RTCBuffer hbuffer)
- {
- Buffer* buffer = (Buffer*)hbuffer;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcReleaseBuffer);
- RTC_VERIFY_HANDLE(hbuffer);
- buffer->refDec();
- RTC_CATCH_END2(buffer);
- }
-
- RTC_API RTCScene rtcNewScene (RTCDevice hdevice)
- {
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcNewScene);
- RTC_VERIFY_HANDLE(hdevice);
- Scene* scene = new Scene((Device*)hdevice);
- return (RTCScene) scene->refInc();
- RTC_CATCH_END((Device*)hdevice);
- return nullptr;
- }
-
- RTC_API RTCDevice rtcGetSceneDevice(RTCScene hscene)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetSceneDevice);
- RTC_VERIFY_HANDLE(hscene);
- return (RTCDevice)scene->device->refInc(); // user will own one additional device reference
- RTC_CATCH_END2(scene);
- return (RTCDevice)nullptr;
- }
-
- RTC_API void rtcSetSceneProgressMonitorFunction(RTCScene hscene, RTCProgressMonitorFunction progress, void* ptr)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetSceneProgressMonitorFunction);
- RTC_VERIFY_HANDLE(hscene);
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(g_mutex);
-#else
- Lock<MutexSys> lock(g_mutex);
-#endif
- scene->setProgressMonitorFunction(progress,ptr);
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcSetSceneBuildQuality (RTCScene hscene, RTCBuildQuality quality)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetSceneBuildQuality);
- RTC_VERIFY_HANDLE(hscene);
- if (quality != RTC_BUILD_QUALITY_LOW &&
- quality != RTC_BUILD_QUALITY_MEDIUM &&
- quality != RTC_BUILD_QUALITY_HIGH)
- // -- GODOT start --
- // throw std::runtime_error("invalid build quality");
- abort();
- // -- GODOT end --
- scene->setBuildQuality(quality);
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcSetSceneFlags (RTCScene hscene, RTCSceneFlags flags)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetSceneFlags);
- RTC_VERIFY_HANDLE(hscene);
- scene->setSceneFlags(flags);
- RTC_CATCH_END2(scene);
- }
-
- RTC_API RTCSceneFlags rtcGetSceneFlags(RTCScene hscene)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetSceneFlags);
- RTC_VERIFY_HANDLE(hscene);
- return scene->getSceneFlags();
- RTC_CATCH_END2(scene);
- return RTC_SCENE_FLAG_NONE;
- }
-
- RTC_API void rtcCommitScene (RTCScene hscene)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcCommitScene);
- RTC_VERIFY_HANDLE(hscene);
- scene->commit(false);
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcJoinCommitScene (RTCScene hscene)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcJoinCommitScene);
- RTC_VERIFY_HANDLE(hscene);
- scene->commit(true);
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcGetSceneBounds(RTCScene hscene, RTCBounds* bounds_o)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetSceneBounds);
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- BBox3fa bounds = scene->bounds.bounds();
- bounds_o->lower_x = bounds.lower.x;
- bounds_o->lower_y = bounds.lower.y;
- bounds_o->lower_z = bounds.lower.z;
- bounds_o->align0 = 0;
- bounds_o->upper_x = bounds.upper.x;
- bounds_o->upper_y = bounds.upper.y;
- bounds_o->upper_z = bounds.upper.z;
- bounds_o->align1 = 0;
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcGetSceneLinearBounds(RTCScene hscene, RTCLinearBounds* bounds_o)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetSceneBounds);
- RTC_VERIFY_HANDLE(hscene);
- if (bounds_o == nullptr)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid destination pointer");
- if (scene->isModified())
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
-
- bounds_o->bounds0.lower_x = scene->bounds.bounds0.lower.x;
- bounds_o->bounds0.lower_y = scene->bounds.bounds0.lower.y;
- bounds_o->bounds0.lower_z = scene->bounds.bounds0.lower.z;
- bounds_o->bounds0.align0 = 0;
- bounds_o->bounds0.upper_x = scene->bounds.bounds0.upper.x;
- bounds_o->bounds0.upper_y = scene->bounds.bounds0.upper.y;
- bounds_o->bounds0.upper_z = scene->bounds.bounds0.upper.z;
- bounds_o->bounds0.align1 = 0;
- bounds_o->bounds1.lower_x = scene->bounds.bounds1.lower.x;
- bounds_o->bounds1.lower_y = scene->bounds.bounds1.lower.y;
- bounds_o->bounds1.lower_z = scene->bounds.bounds1.lower.z;
- bounds_o->bounds1.align0 = 0;
- bounds_o->bounds1.upper_x = scene->bounds.bounds1.upper.x;
- bounds_o->bounds1.upper_y = scene->bounds.bounds1.upper.y;
- bounds_o->bounds1.upper_z = scene->bounds.bounds1.upper.z;
- bounds_o->bounds1.align1 = 0;
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcCollide (RTCScene hscene0, RTCScene hscene1, RTCCollideFunc callback, void* userPtr)
- {
- Scene* scene0 = (Scene*) hscene0;
- Scene* scene1 = (Scene*) hscene1;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcCollide);
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene0);
- RTC_VERIFY_HANDLE(hscene1);
- if (scene0->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed");
- if (scene1->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed");
- if (scene0->device != scene1->device) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scenes are from different devices");
- auto nUserPrims0 = scene0->getNumPrimitives (Geometry::MTY_USER_GEOMETRY, false);
- auto nUserPrims1 = scene1->getNumPrimitives (Geometry::MTY_USER_GEOMETRY, false);
- if (scene0->numPrimitives() != nUserPrims0 && scene1->numPrimitives() != nUserPrims1) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scenes must only contain user geometries with a single timestep");
-#endif
- scene0->intersectors.collide(scene0,scene1,callback,userPtr);
- RTC_CATCH_END(scene0->device);
- }
-
- inline bool pointQuery(Scene* scene, RTCPointQuery* query, RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void* userPtr)
- {
- bool changed = false;
- if (userContext->instStackSize > 0)
- {
- const AffineSpace3fa transform = AffineSpace3fa_load_unaligned((AffineSpace3fa*)userContext->world2inst[userContext->instStackSize-1]);
-
- float similarityScale = 0.f;
- const bool similtude = similarityTransform(transform, &similarityScale);
- assert((similtude && similarityScale > 0) || (!similtude && similarityScale == 0.f));
-
- PointQuery query_inst;
- query_inst.p = xfmPoint(transform, Vec3fa(query->x, query->y, query->z));
- query_inst.radius = query->radius * similarityScale;
- query_inst.time = query->time;
-
- PointQueryContext context_inst(scene, (PointQuery*)query,
- similtude ? POINT_QUERY_TYPE_SPHERE : POINT_QUERY_TYPE_AABB,
- queryFunc, userContext, similarityScale, userPtr);
- changed = scene->intersectors.pointQuery((PointQuery*)&query_inst, &context_inst);
- }
- else
- {
- PointQueryContext context(scene, (PointQuery*)query,
- POINT_QUERY_TYPE_SPHERE, queryFunc, userContext, 1.f, userPtr);
- changed = scene->intersectors.pointQuery((PointQuery*)query, &context);
- }
- return changed;
- }
-
- RTC_API bool rtcPointQuery(RTCScene hscene, RTCPointQuery* query, RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void* userPtr)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcPointQuery);
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- RTC_VERIFY_HANDLE(userContext);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed");
- if (((size_t)query) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "query not aligned to 16 bytes");
- if (((size_t)userContext) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "context not aligned to 16 bytes");
-#endif
-
- return pointQuery(scene, query, userContext, queryFunc, userPtr);
- RTC_CATCH_END2_FALSE(scene);
- }
-
- RTC_API bool rtcPointQuery4 (const int* valid, RTCScene hscene, RTCPointQuery4* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcPointQuery4);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed");
- if (((size_t)valid) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 16 bytes");
- if (((size_t)query) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "query not aligned to 16 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<4; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(point_query.travs,cnt,cnt,cnt);
-
- bool changed = false;
- PointQuery4* query4 = (PointQuery4*)query;
- PointQuery query1;
- for (size_t i=0; i<4; i++) {
- if (!valid[i]) continue;
- query4->get(i,query1);
- changed |= pointQuery(scene, (RTCPointQuery*)&query1, userContext, queryFunc, userPtrN?userPtrN[i]:NULL);
- query4->set(i,query1);
- }
- return changed;
- RTC_CATCH_END2_FALSE(scene);
- }
-
- RTC_API bool rtcPointQuery8 (const int* valid, RTCScene hscene, RTCPointQuery8* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcPointQuery8);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed");
- if (((size_t)valid) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 16 bytes");
- if (((size_t)query) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "query not aligned to 16 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<4; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(point_query.travs,cnt,cnt,cnt);
-
- bool changed = false;
- PointQuery8* query8 = (PointQuery8*)query;
- PointQuery query1;
- for (size_t i=0; i<8; i++) {
- if (!valid[i]) continue;
- query8->get(i,query1);
- changed |= pointQuery(scene, (RTCPointQuery*)&query1, userContext, queryFunc, userPtrN?userPtrN[i]:NULL);
- query8->set(i,query1);
- }
- return changed;
- RTC_CATCH_END2_FALSE(scene);
- }
-
- RTC_API bool rtcPointQuery16 (const int* valid, RTCScene hscene, RTCPointQuery16* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcPointQuery16);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed");
- if (((size_t)valid) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 16 bytes");
- if (((size_t)query) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "query not aligned to 16 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<4; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(point_query.travs,cnt,cnt,cnt);
-
- bool changed = false;
- PointQuery16* query16 = (PointQuery16*)query;
- PointQuery query1;
- for (size_t i=0; i<16; i++) {
- if (!valid[i]) continue;
- PointQuery query1; query16->get(i,query1);
- changed |= pointQuery(scene, (RTCPointQuery*)&query1, userContext, queryFunc, userPtrN?userPtrN[i]:NULL);
- query16->set(i,query1);
- }
- return changed;
- RTC_CATCH_END2_FALSE(scene);
- }
-
- RTC_API void rtcIntersect1 (RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit* rayhit)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcIntersect1);
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)rayhit) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 16 bytes");
-#endif
- STAT3(normal.travs,1,1,1);
- IntersectContext context(scene,user_context);
- scene->intersectors.intersect(*rayhit,&context);
-#if defined(DEBUG)
- ((RayHit*)rayhit)->verifyHit();
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcIntersect4 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit4* rayhit)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcIntersect4);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)valid) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 16 bytes");
- if (((size_t)rayhit) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit not aligned to 16 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<4; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(normal.travs,cnt,cnt,cnt);
-
- IntersectContext context(scene,user_context);
-#if !defined(EMBREE_RAY_PACKETS)
- RayHit4* rayhit4 = (RayHit4*)rayhit;
- for (size_t i=0; i<4; i++) {
- if (!valid[i]) continue;
- RayHit ray1; rayhit4->get(i,ray1);
- scene->intersectors.intersect((RTCRayHit&)ray1,&context);
- rayhit4->set(i,ray1);
- }
-#else
- scene->intersectors.intersect4(valid,*rayhit,&context);
-#endif
-
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcIntersect8 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit8* rayhit)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcIntersect8);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)valid) & 0x1F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 32 bytes");
- if (((size_t)rayhit) & 0x1F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit not aligned to 32 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<8; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(normal.travs,cnt,cnt,cnt);
-
- IntersectContext context(scene,user_context);
-#if !defined(EMBREE_RAY_PACKETS)
- RayHit8* rayhit8 = (RayHit8*) rayhit;
- for (size_t i=0; i<8; i++) {
- if (!valid[i]) continue;
- RayHit ray1; rayhit8->get(i,ray1);
- scene->intersectors.intersect((RTCRayHit&)ray1,&context);
- rayhit8->set(i,ray1);
- }
-#else
- if (likely(scene->intersectors.intersector8))
- scene->intersectors.intersect8(valid,*rayhit,&context);
- else
- scene->device->rayStreamFilters.intersectSOA(scene,(char*)rayhit,8,1,sizeof(RTCRayHit8),&context);
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcIntersect16 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit16* rayhit)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcIntersect16);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)valid) & 0x3F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 64 bytes");
- if (((size_t)rayhit) & 0x3F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit not aligned to 64 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<16; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(normal.travs,cnt,cnt,cnt);
-
- IntersectContext context(scene,user_context);
-#if !defined(EMBREE_RAY_PACKETS)
- RayHit16* rayhit16 = (RayHit16*) rayhit;
- for (size_t i=0; i<16; i++) {
- if (!valid[i]) continue;
- RayHit ray1; rayhit16->get(i,ray1);
- scene->intersectors.intersect((RTCRayHit&)ray1,&context);
- rayhit16->set(i,ray1);
- }
-#else
- if (likely(scene->intersectors.intersector16))
- scene->intersectors.intersect16(valid,*rayhit,&context);
- else
- scene->device->rayStreamFilters.intersectSOA(scene,(char*)rayhit,16,1,sizeof(RTCRayHit16),&context);
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcIntersect1M (RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit* rayhit, unsigned int M, size_t byteStride)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcIntersect1M);
-
-#if defined (EMBREE_RAY_PACKETS)
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)rayhit ) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes");
-#endif
- STAT3(normal.travs,M,M,M);
- IntersectContext context(scene,user_context);
-
- /* fast codepath for single rays */
- if (likely(M == 1)) {
- if (likely(rayhit->ray.tnear <= rayhit->ray.tfar))
- scene->intersectors.intersect(*rayhit,&context);
- }
-
- /* codepath for streams */
- else {
- scene->device->rayStreamFilters.intersectAOS(scene,rayhit,M,byteStride,&context);
- }
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect1M not supported");
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcIntersect1Mp (RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit** rn, unsigned int M)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcIntersect1Mp);
-
-#if defined (EMBREE_RAY_PACKETS)
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)rn) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes");
-#endif
- STAT3(normal.travs,M,M,M);
- IntersectContext context(scene,user_context);
-
- /* fast codepath for single rays */
- if (likely(M == 1)) {
- if (likely(rn[0]->ray.tnear <= rn[0]->ray.tfar))
- scene->intersectors.intersect(*rn[0],&context);
- }
-
- /* codepath for streams */
- else {
- scene->device->rayStreamFilters.intersectAOP(scene,rn,M,&context);
- }
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect1Mp not supported");
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcIntersectNM (RTCScene hscene, RTCIntersectContext* user_context, struct RTCRayHitN* rayhit, unsigned int N, unsigned int M, size_t byteStride)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcIntersectNM);
-
-#if defined (EMBREE_RAY_PACKETS)
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)rayhit) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes");
-#endif
- STAT3(normal.travs,N*M,N*M,N*M);
- IntersectContext context(scene,user_context);
-
- /* code path for single ray streams */
- if (likely(N == 1))
- {
- /* fast code path for streams of size 1 */
- if (likely(M == 1)) {
- if (likely(((RTCRayHit*)rayhit)->ray.tnear <= ((RTCRayHit*)rayhit)->ray.tfar))
- scene->intersectors.intersect(*(RTCRayHit*)rayhit,&context);
- }
- /* normal codepath for single ray streams */
- else {
- scene->device->rayStreamFilters.intersectAOS(scene,(RTCRayHit*)rayhit,M,byteStride,&context);
- }
- }
- /* code path for ray packet streams */
- else {
- scene->device->rayStreamFilters.intersectSOA(scene,(char*)rayhit,N,M,byteStride,&context);
- }
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersectNM not supported");
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcIntersectNp (RTCScene hscene, RTCIntersectContext* user_context, const RTCRayHitNp* rayhit, unsigned int N)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcIntersectNp);
-
-#if defined (EMBREE_RAY_PACKETS)
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)rayhit->ray.org_x ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.org_x not aligned to 4 bytes");
- if (((size_t)rayhit->ray.org_y ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.org_y not aligned to 4 bytes");
- if (((size_t)rayhit->ray.org_z ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.org_z not aligned to 4 bytes");
- if (((size_t)rayhit->ray.dir_x ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.dir_x not aligned to 4 bytes");
- if (((size_t)rayhit->ray.dir_y ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.dir_y not aligned to 4 bytes");
- if (((size_t)rayhit->ray.dir_z ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.dir_z not aligned to 4 bytes");
- if (((size_t)rayhit->ray.tnear ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.dir_x not aligned to 4 bytes");
- if (((size_t)rayhit->ray.tfar ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.tnear not aligned to 4 bytes");
- if (((size_t)rayhit->ray.time ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.time not aligned to 4 bytes");
- if (((size_t)rayhit->ray.mask ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.mask not aligned to 4 bytes");
- if (((size_t)rayhit->hit.Ng_x ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.Ng_x not aligned to 4 bytes");
- if (((size_t)rayhit->hit.Ng_y ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.Ng_y not aligned to 4 bytes");
- if (((size_t)rayhit->hit.Ng_z ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.Ng_z not aligned to 4 bytes");
- if (((size_t)rayhit->hit.u ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.u not aligned to 4 bytes");
- if (((size_t)rayhit->hit.v ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.v not aligned to 4 bytes");
- if (((size_t)rayhit->hit.geomID) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.geomID not aligned to 4 bytes");
- if (((size_t)rayhit->hit.primID) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.primID not aligned to 4 bytes");
- if (((size_t)rayhit->hit.instID) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.instID not aligned to 4 bytes");
-#endif
- STAT3(normal.travs,N,N,N);
- IntersectContext context(scene,user_context);
- scene->device->rayStreamFilters.intersectSOP(scene,rayhit,N,&context);
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersectNp not supported");
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcOccluded1 (RTCScene hscene, RTCIntersectContext* user_context, RTCRay* ray)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcOccluded1);
- STAT3(shadow.travs,1,1,1);
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)ray) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 16 bytes");
-#endif
- IntersectContext context(scene,user_context);
- scene->intersectors.occluded(*ray,&context);
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcOccluded4 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRay4* ray)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcOccluded4);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)valid) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 16 bytes");
- if (((size_t)ray) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 16 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<4; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(shadow.travs,cnt,cnt,cnt);
-
- IntersectContext context(scene,user_context);
-#if !defined(EMBREE_RAY_PACKETS)
- Ray4* ray4 = (Ray4*) ray;
- for (size_t i=0; i<4; i++) {
- if (!valid[i]) continue;
- Ray ray1; ray4->get(i,ray1);
- scene->intersectors.occluded((RTCRay&)ray1,&context);
- ray4->set(i,ray1);
- }
-#else
- scene->intersectors.occluded4(valid,*ray,&context);
-#endif
-
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcOccluded8 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRay8* ray)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcOccluded8);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)valid) & 0x1F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 32 bytes");
- if (((size_t)ray) & 0x1F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 32 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<8; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(shadow.travs,cnt,cnt,cnt);
-
- IntersectContext context(scene,user_context);
-#if !defined(EMBREE_RAY_PACKETS)
- Ray8* ray8 = (Ray8*) ray;
- for (size_t i=0; i<8; i++) {
- if (!valid[i]) continue;
- Ray ray1; ray8->get(i,ray1);
- scene->intersectors.occluded((RTCRay&)ray1,&context);
- ray8->set(i,ray1);
- }
-#else
- if (likely(scene->intersectors.intersector8))
- scene->intersectors.occluded8(valid,*ray,&context);
- else
- scene->device->rayStreamFilters.occludedSOA(scene,(char*)ray,8,1,sizeof(RTCRay8),&context);
-#endif
-
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcOccluded16 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRay16* ray)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcOccluded16);
-
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)valid) & 0x3F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 64 bytes");
- if (((size_t)ray) & 0x3F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 64 bytes");
-#endif
- STAT(size_t cnt=0; for (size_t i=0; i<16; i++) cnt += ((int*)valid)[i] == -1;);
- STAT3(shadow.travs,cnt,cnt,cnt);
-
- IntersectContext context(scene,user_context);
-#if !defined(EMBREE_RAY_PACKETS)
- Ray16* ray16 = (Ray16*) ray;
- for (size_t i=0; i<16; i++) {
- if (!valid[i]) continue;
- Ray ray1; ray16->get(i,ray1);
- scene->intersectors.occluded((RTCRay&)ray1,&context);
- ray16->set(i,ray1);
- }
-#else
- if (likely(scene->intersectors.intersector16))
- scene->intersectors.occluded16(valid,*ray,&context);
- else
- scene->device->rayStreamFilters.occludedSOA(scene,(char*)ray,16,1,sizeof(RTCRay16),&context);
-#endif
-
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcOccluded1M(RTCScene hscene, RTCIntersectContext* user_context, RTCRay* ray, unsigned int M, size_t byteStride)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcOccluded1M);
-
-#if defined (EMBREE_RAY_PACKETS)
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)ray) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes");
-#endif
- STAT3(shadow.travs,M,M,M);
- IntersectContext context(scene,user_context);
- /* fast codepath for streams of size 1 */
- if (likely(M == 1)) {
- if (likely(ray->tnear <= ray->tfar))
- scene->intersectors.occluded (*ray,&context);
- }
- /* codepath for normal streams */
- else {
- scene->device->rayStreamFilters.occludedAOS(scene,ray,M,byteStride,&context);
- }
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcOccluded1M not supported");
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcOccluded1Mp(RTCScene hscene, RTCIntersectContext* user_context, RTCRay** ray, unsigned int M)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcOccluded1Mp);
-
-#if defined (EMBREE_RAY_PACKETS)
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)ray) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes");
-#endif
- STAT3(shadow.travs,M,M,M);
- IntersectContext context(scene,user_context);
-
- /* fast codepath for streams of size 1 */
- if (likely(M == 1)) {
- if (likely(ray[0]->tnear <= ray[0]->tfar))
- scene->intersectors.occluded (*ray[0],&context);
- }
- /* codepath for normal streams */
- else {
- scene->device->rayStreamFilters.occludedAOP(scene,ray,M,&context);
- }
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcOccluded1Mp not supported");
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcOccludedNM(RTCScene hscene, RTCIntersectContext* user_context, RTCRayN* ray, unsigned int N, unsigned int M, size_t byteStride)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcOccludedNM);
-
-#if defined (EMBREE_RAY_PACKETS)
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (byteStride < sizeof(RTCRayHit)) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"byteStride too small");
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)ray) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes");
-#endif
- STAT3(shadow.travs,N*M,N*N,N*N);
- IntersectContext context(scene,user_context);
-
- /* codepath for single rays */
- if (likely(N == 1))
- {
- /* fast path for streams of size 1 */
- if (likely(M == 1)) {
- if (likely(((RTCRay*)ray)->tnear <= ((RTCRay*)ray)->tfar))
- scene->intersectors.occluded (*(RTCRay*)ray,&context);
- }
- /* codepath for normal ray streams */
- else {
- scene->device->rayStreamFilters.occludedAOS(scene,(RTCRay*)ray,M,byteStride,&context);
- }
- }
- /* code path for ray packet streams */
- else {
- scene->device->rayStreamFilters.occludedSOA(scene,(char*)ray,N,M,byteStride,&context);
- }
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcOccludedNM not supported");
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcOccludedNp(RTCScene hscene, RTCIntersectContext* user_context, const RTCRayNp* ray, unsigned int N)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcOccludedNp);
-
-#if defined (EMBREE_RAY_PACKETS)
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed");
- if (((size_t)ray->org_x ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "org_x not aligned to 4 bytes");
- if (((size_t)ray->org_y ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "org_y not aligned to 4 bytes");
- if (((size_t)ray->org_z ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "org_z not aligned to 4 bytes");
- if (((size_t)ray->dir_x ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "dir_x not aligned to 4 bytes");
- if (((size_t)ray->dir_y ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "dir_y not aligned to 4 bytes");
- if (((size_t)ray->dir_z ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "dir_z not aligned to 4 bytes");
- if (((size_t)ray->tnear ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "dir_x not aligned to 4 bytes");
- if (((size_t)ray->tfar ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "tnear not aligned to 4 bytes");
- if (((size_t)ray->time ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "time not aligned to 4 bytes");
- if (((size_t)ray->mask ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 4 bytes");
-#endif
- STAT3(shadow.travs,N,N,N);
- IntersectContext context(scene,user_context);
- scene->device->rayStreamFilters.occludedSOP(scene,ray,N,&context);
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcOccludedNp not supported");
-#endif
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcRetainScene (RTCScene hscene)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcRetainScene);
- RTC_VERIFY_HANDLE(hscene);
- scene->refInc();
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcReleaseScene (RTCScene hscene)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcReleaseScene);
- RTC_VERIFY_HANDLE(hscene);
- scene->refDec();
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcSetGeometryInstancedScene(RTCGeometry hgeometry, RTCScene hscene)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- Ref<Scene> scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryInstancedScene);
- RTC_VERIFY_HANDLE(hgeometry);
- RTC_VERIFY_HANDLE(hscene);
- geometry->setInstancedScene(scene);
- RTC_CATCH_END2(geometry);
- }
-
- AffineSpace3fa loadTransform(RTCFormat format, const float* xfm)
- {
- AffineSpace3fa space = one;
- switch (format)
- {
- case RTC_FORMAT_FLOAT3X4_ROW_MAJOR:
- space = AffineSpace3fa(Vec3fa(xfm[ 0], xfm[ 4], xfm[ 8]),
- Vec3fa(xfm[ 1], xfm[ 5], xfm[ 9]),
- Vec3fa(xfm[ 2], xfm[ 6], xfm[10]),
- Vec3fa(xfm[ 3], xfm[ 7], xfm[11]));
- break;
-
- case RTC_FORMAT_FLOAT3X4_COLUMN_MAJOR:
- space = AffineSpace3fa(Vec3fa(xfm[ 0], xfm[ 1], xfm[ 2]),
- Vec3fa(xfm[ 3], xfm[ 4], xfm[ 5]),
- Vec3fa(xfm[ 6], xfm[ 7], xfm[ 8]),
- Vec3fa(xfm[ 9], xfm[10], xfm[11]));
- break;
-
- case RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR:
- space = AffineSpace3fa(Vec3fa(xfm[ 0], xfm[ 1], xfm[ 2]),
- Vec3fa(xfm[ 4], xfm[ 5], xfm[ 6]),
- Vec3fa(xfm[ 8], xfm[ 9], xfm[10]),
- Vec3fa(xfm[12], xfm[13], xfm[14]));
- break;
-
- default:
- throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid matrix format");
- break;
- }
- return space;
- }
-
- void storeTransform(const AffineSpace3fa& space, RTCFormat format, float* xfm)
- {
- switch (format)
- {
- case RTC_FORMAT_FLOAT3X4_ROW_MAJOR:
- xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vy.x; xfm[ 2] = space.l.vz.x; xfm[ 3] = space.p.x;
- xfm[ 4] = space.l.vx.y; xfm[ 5] = space.l.vy.y; xfm[ 6] = space.l.vz.y; xfm[ 7] = space.p.y;
- xfm[ 8] = space.l.vx.z; xfm[ 9] = space.l.vy.z; xfm[10] = space.l.vz.z; xfm[11] = space.p.z;
- break;
-
- case RTC_FORMAT_FLOAT3X4_COLUMN_MAJOR:
- xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vx.y; xfm[ 2] = space.l.vx.z;
- xfm[ 3] = space.l.vy.x; xfm[ 4] = space.l.vy.y; xfm[ 5] = space.l.vy.z;
- xfm[ 6] = space.l.vz.x; xfm[ 7] = space.l.vz.y; xfm[ 8] = space.l.vz.z;
- xfm[ 9] = space.p.x; xfm[10] = space.p.y; xfm[11] = space.p.z;
- break;
-
- case RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR:
- xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vx.y; xfm[ 2] = space.l.vx.z; xfm[ 3] = 0.f;
- xfm[ 4] = space.l.vy.x; xfm[ 5] = space.l.vy.y; xfm[ 6] = space.l.vy.z; xfm[ 7] = 0.f;
- xfm[ 8] = space.l.vz.x; xfm[ 9] = space.l.vz.y; xfm[10] = space.l.vz.z; xfm[11] = 0.f;
- xfm[12] = space.p.x; xfm[13] = space.p.y; xfm[14] = space.p.z; xfm[15] = 1.f;
- break;
-
- default:
- throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid matrix format");
- break;
- }
- }
-
- RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeStep, RTCFormat format, const void* xfm)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryTransform);
- RTC_VERIFY_HANDLE(hgeometry);
- RTC_VERIFY_HANDLE(xfm);
- const AffineSpace3fa transform = loadTransform(format, (const float*)xfm);
- geometry->setTransform(transform, timeStep);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryTransformQuaternion(RTCGeometry hgeometry, unsigned int timeStep, const RTCQuaternionDecomposition* qd)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryTransformQuaternion);
- RTC_VERIFY_HANDLE(hgeometry);
- RTC_VERIFY_HANDLE(qd);
-
- AffineSpace3fx transform;
- transform.l.vx.x = qd->scale_x;
- transform.l.vy.y = qd->scale_y;
- transform.l.vz.z = qd->scale_z;
- transform.l.vy.x = qd->skew_xy;
- transform.l.vz.x = qd->skew_xz;
- transform.l.vz.y = qd->skew_yz;
- transform.l.vx.y = qd->translation_x;
- transform.l.vx.z = qd->translation_y;
- transform.l.vy.z = qd->translation_z;
- transform.p.x = qd->shift_x;
- transform.p.y = qd->shift_y;
- transform.p.z = qd->shift_z;
-
- // normalize quaternion
- Quaternion3f q(qd->quaternion_r, qd->quaternion_i, qd->quaternion_j, qd->quaternion_k);
- q = normalize(q);
- transform.l.vx.w = q.i;
- transform.l.vy.w = q.j;
- transform.l.vz.w = q.k;
- transform.p.w = q.r;
-
- geometry->setQuaternionDecomposition(transform, timeStep);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcGetGeometryTransform(RTCGeometry hgeometry, float time, RTCFormat format, void* xfm)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometryTransform);
- const AffineSpace3fa transform = geometry->getTransform(time);
- storeTransform(transform, format, (float*)xfm);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcFilterIntersection(const struct RTCIntersectFunctionNArguments* const args_i, const struct RTCFilterFunctionNArguments* filter_args)
- {
- IntersectFunctionNArguments* args = (IntersectFunctionNArguments*) args_i;
- args->report(args,filter_args);
- }
-
- RTC_API void rtcFilterOcclusion(const struct RTCOccludedFunctionNArguments* const args_i, const struct RTCFilterFunctionNArguments* filter_args)
- {
- OccludedFunctionNArguments* args = (OccludedFunctionNArguments*) args_i;
- args->report(args,filter_args);
- }
-
- RTC_API RTCGeometry rtcNewGeometry (RTCDevice hdevice, RTCGeometryType type)
- {
- Device* device = (Device*) hdevice;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcNewGeometry);
- RTC_VERIFY_HANDLE(hdevice);
-
- switch (type)
- {
- case RTC_GEOMETRY_TYPE_TRIANGLE:
- {
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- createTriangleMeshTy createTriangleMesh = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createTriangleMesh);
- Geometry* geom = createTriangleMesh(device);
- return (RTCGeometry) geom->refInc();
-#else
- throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_TRIANGLE is not supported");
-#endif
- }
-
- case RTC_GEOMETRY_TYPE_QUAD:
- {
-#if defined(EMBREE_GEOMETRY_QUAD)
- createQuadMeshTy createQuadMesh = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createQuadMesh);
- Geometry* geom = createQuadMesh(device);
- return (RTCGeometry) geom->refInc();
-#else
- throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_QUAD is not supported");
-#endif
- }
-
- case RTC_GEOMETRY_TYPE_SPHERE_POINT:
- case RTC_GEOMETRY_TYPE_DISC_POINT:
- case RTC_GEOMETRY_TYPE_ORIENTED_DISC_POINT:
- {
-#if defined(EMBREE_GEOMETRY_POINT)
- createPointsTy createPoints = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_builder_cpu_features, createPoints);
-
- Geometry *geom;
- switch(type) {
- case RTC_GEOMETRY_TYPE_SPHERE_POINT:
- geom = createPoints(device, Geometry::GTY_SPHERE_POINT);
- break;
- case RTC_GEOMETRY_TYPE_DISC_POINT:
- geom = createPoints(device, Geometry::GTY_DISC_POINT);
- break;
- case RTC_GEOMETRY_TYPE_ORIENTED_DISC_POINT:
- geom = createPoints(device, Geometry::GTY_ORIENTED_DISC_POINT);
- break;
- default:
- geom = nullptr;
- break;
- }
- return (RTCGeometry) geom->refInc();
-#else
- throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_POINT is not supported");
-#endif
- }
-
- case RTC_GEOMETRY_TYPE_CONE_LINEAR_CURVE:
- case RTC_GEOMETRY_TYPE_ROUND_LINEAR_CURVE:
- case RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE:
-
- case RTC_GEOMETRY_TYPE_ROUND_BEZIER_CURVE:
- case RTC_GEOMETRY_TYPE_FLAT_BEZIER_CURVE:
- case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BEZIER_CURVE:
-
- case RTC_GEOMETRY_TYPE_ROUND_BSPLINE_CURVE:
- case RTC_GEOMETRY_TYPE_FLAT_BSPLINE_CURVE:
- case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BSPLINE_CURVE:
-
- case RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE:
- case RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE:
- case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_HERMITE_CURVE:
-
- case RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE:
- case RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE:
- case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_CATMULL_ROM_CURVE:
- {
-#if defined(EMBREE_GEOMETRY_CURVE)
- createLineSegmentsTy createLineSegments = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createLineSegments);
- createCurvesTy createCurves = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createCurves);
-
- Geometry* geom;
- switch (type) {
- case RTC_GEOMETRY_TYPE_CONE_LINEAR_CURVE : geom = createLineSegments (device,Geometry::GTY_CONE_LINEAR_CURVE); break;
- case RTC_GEOMETRY_TYPE_ROUND_LINEAR_CURVE : geom = createLineSegments (device,Geometry::GTY_ROUND_LINEAR_CURVE); break;
- case RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE : geom = createLineSegments (device,Geometry::GTY_FLAT_LINEAR_CURVE); break;
- //case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_LINEAR_CURVE : geom = createLineSegments (device,Geometry::GTY_ORIENTED_LINEAR_CURVE); break;
-
- case RTC_GEOMETRY_TYPE_ROUND_BEZIER_CURVE : geom = createCurves(device,Geometry::GTY_ROUND_BEZIER_CURVE); break;
- case RTC_GEOMETRY_TYPE_FLAT_BEZIER_CURVE : geom = createCurves(device,Geometry::GTY_FLAT_BEZIER_CURVE); break;
- case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BEZIER_CURVE : geom = createCurves(device,Geometry::GTY_ORIENTED_BEZIER_CURVE); break;
-
- case RTC_GEOMETRY_TYPE_ROUND_BSPLINE_CURVE : geom = createCurves(device,Geometry::GTY_ROUND_BSPLINE_CURVE); break;
- case RTC_GEOMETRY_TYPE_FLAT_BSPLINE_CURVE : geom = createCurves(device,Geometry::GTY_FLAT_BSPLINE_CURVE); break;
- case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BSPLINE_CURVE : geom = createCurves(device,Geometry::GTY_ORIENTED_BSPLINE_CURVE); break;
-
- case RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE : geom = createCurves(device,Geometry::GTY_ROUND_HERMITE_CURVE); break;
- case RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE : geom = createCurves(device,Geometry::GTY_FLAT_HERMITE_CURVE); break;
- case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_HERMITE_CURVE : geom = createCurves(device,Geometry::GTY_ORIENTED_HERMITE_CURVE); break;
-
- case RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE : geom = createCurves(device,Geometry::GTY_ROUND_CATMULL_ROM_CURVE); break;
- case RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE : geom = createCurves(device,Geometry::GTY_FLAT_CATMULL_ROM_CURVE); break;
- case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_CATMULL_ROM_CURVE : geom = createCurves(device,Geometry::GTY_ORIENTED_CATMULL_ROM_CURVE); break;
- default: geom = nullptr; break;
- }
- return (RTCGeometry) geom->refInc();
-#else
- throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_CURVE is not supported");
-#endif
- }
-
- case RTC_GEOMETRY_TYPE_SUBDIVISION:
- {
-#if defined(EMBREE_GEOMETRY_SUBDIVISION)
- createSubdivMeshTy createSubdivMesh = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX(device->enabled_cpu_features,createSubdivMesh);
- //SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createSubdivMesh); // FIXME: this does not work for some reason?
- Geometry* geom = createSubdivMesh(device);
- return (RTCGeometry) geom->refInc();
-#else
- throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_SUBDIVISION is not supported");
-#endif
- }
-
- case RTC_GEOMETRY_TYPE_USER:
- {
-#if defined(EMBREE_GEOMETRY_USER)
- createUserGeometryTy createUserGeometry = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createUserGeometry);
- Geometry* geom = createUserGeometry(device);
- return (RTCGeometry) geom->refInc();
-#else
- throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_USER is not supported");
-#endif
- }
-
- case RTC_GEOMETRY_TYPE_INSTANCE:
- {
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- createInstanceTy createInstance = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createInstance);
- Geometry* geom = createInstance(device);
- return (RTCGeometry) geom->refInc();
-#else
- throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_INSTANCE is not supported");
-#endif
- }
-
- case RTC_GEOMETRY_TYPE_GRID:
- {
-#if defined(EMBREE_GEOMETRY_GRID)
- createGridMeshTy createGridMesh = nullptr;
- SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createGridMesh);
- Geometry* geom = createGridMesh(device);
- return (RTCGeometry) geom->refInc();
-#else
- throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_GRID is not supported");
-#endif
- }
-
- default:
- throw_RTCError(RTC_ERROR_UNKNOWN,"invalid geometry type");
- }
-
- RTC_CATCH_END(device);
- return nullptr;
- }
-
- RTC_API void rtcSetGeometryUserPrimitiveCount(RTCGeometry hgeometry, unsigned int userPrimitiveCount)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryUserPrimitiveCount);
- RTC_VERIFY_HANDLE(hgeometry);
-
- if (unlikely(geometry->getType() != Geometry::GTY_USER_GEOMETRY))
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation only allowed for user geometries");
-
- geometry->setNumPrimitives(userPrimitiveCount);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryTimeStepCount(RTCGeometry hgeometry, unsigned int timeStepCount)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryTimeStepCount);
- RTC_VERIFY_HANDLE(hgeometry);
-
- if (timeStepCount > RTC_MAX_TIME_STEP_COUNT)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"number of time steps is out of range");
-
- geometry->setNumTimeSteps(timeStepCount);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryTimeRange(RTCGeometry hgeometry, float startTime, float endTime)
- {
- Ref<Geometry> geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryTimeRange);
- RTC_VERIFY_HANDLE(hgeometry);
-
- if (startTime > endTime)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"startTime has to be smaller or equal to the endTime");
-
- geometry->setTimeRange(BBox1f(startTime,endTime));
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryVertexAttributeCount(RTCGeometry hgeometry, unsigned int N)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryVertexAttributeCount);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setVertexAttributeCount(N);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryTopologyCount(RTCGeometry hgeometry, unsigned int N)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryTopologyCount);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setTopologyCount(N);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryBuildQuality (RTCGeometry hgeometry, RTCBuildQuality quality)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryBuildQuality);
- RTC_VERIFY_HANDLE(hgeometry);
- if (quality != RTC_BUILD_QUALITY_LOW &&
- quality != RTC_BUILD_QUALITY_MEDIUM &&
- quality != RTC_BUILD_QUALITY_HIGH &&
- quality != RTC_BUILD_QUALITY_REFIT)
- // -- GODOT start --
- // throw std::runtime_error("invalid build quality");
- abort();
- // -- GODOT end --
- geometry->setBuildQuality(quality);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryMaxRadiusScale(RTCGeometry hgeometry, float maxRadiusScale)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryMaxRadiusScale);
- RTC_VERIFY_HANDLE(hgeometry);
-#if RTC_MIN_WIDTH
- if (maxRadiusScale < 1.0f) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"maximal radius scale has to be larger or equal to 1");
- geometry->setMaxRadiusScale(maxRadiusScale);
-#else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"min-width feature is not enabled");
-#endif
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryMask (RTCGeometry hgeometry, unsigned int mask)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryMask);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setMask(mask);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometrySubdivisionMode (RTCGeometry hgeometry, unsigned topologyID, RTCSubdivisionMode mode)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometrySubdivisionMode);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setSubdivisionMode(topologyID,mode);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryVertexAttributeTopology(RTCGeometry hgeometry, unsigned int vertexAttributeID, unsigned int topologyID)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryVertexAttributeTopology);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setVertexAttributeTopology(vertexAttributeID, topologyID);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryBuffer(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot, RTCFormat format, RTCBuffer hbuffer, size_t byteOffset, size_t byteStride, size_t itemCount)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- Ref<Buffer> buffer = (Buffer*)hbuffer;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryBuffer);
- RTC_VERIFY_HANDLE(hgeometry);
- RTC_VERIFY_HANDLE(hbuffer);
-
- if (geometry->device != buffer->device)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"inputs are from different devices");
-
- if (itemCount > 0xFFFFFFFFu)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"buffer too large");
-
- geometry->setBuffer(type, slot, format, buffer, byteOffset, byteStride, (unsigned int)itemCount);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetSharedGeometryBuffer(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot, RTCFormat format, const void* ptr, size_t byteOffset, size_t byteStride, size_t itemCount)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetSharedGeometryBuffer);
- RTC_VERIFY_HANDLE(hgeometry);
-
- if (itemCount > 0xFFFFFFFFu)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"buffer too large");
-
- Ref<Buffer> buffer = new Buffer(geometry->device, itemCount*byteStride, (char*)ptr + byteOffset);
- geometry->setBuffer(type, slot, format, buffer, 0, byteStride, (unsigned int)itemCount);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void* rtcSetNewGeometryBuffer(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot, RTCFormat format, size_t byteStride, size_t itemCount)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetNewGeometryBuffer);
- RTC_VERIFY_HANDLE(hgeometry);
-
- if (itemCount > 0xFFFFFFFFu)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"buffer too large");
-
- /* vertex buffers need to get overallocated slightly as elements are accessed using SSE loads */
- size_t bytes = itemCount*byteStride;
- if (type == RTC_BUFFER_TYPE_VERTEX || type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
- bytes += (16 - (byteStride%16))%16;
-
- Ref<Buffer> buffer = new Buffer(geometry->device, bytes);
- geometry->setBuffer(type, slot, format, buffer, 0, byteStride, (unsigned int)itemCount);
- return buffer->data();
- RTC_CATCH_END2(geometry);
- return nullptr;
- }
-
- RTC_API void* rtcGetGeometryBufferData(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometryBufferData);
- RTC_VERIFY_HANDLE(hgeometry);
- return geometry->getBuffer(type, slot);
- RTC_CATCH_END2(geometry);
- return nullptr;
- }
-
- RTC_API void rtcEnableGeometry (RTCGeometry hgeometry)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcEnableGeometry);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->enable();
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcUpdateGeometryBuffer (RTCGeometry hgeometry, RTCBufferType type, unsigned int slot)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcUpdateGeometryBuffer);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->updateBuffer(type, slot);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcDisableGeometry (RTCGeometry hgeometry)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcDisableGeometry);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->disable();
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryTessellationRate (RTCGeometry hgeometry, float tessellationRate)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryTessellationRate);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setTessellationRate(tessellationRate);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryUserData (RTCGeometry hgeometry, void* ptr)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryUserData);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setUserData(ptr);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void* rtcGetGeometryUserData (RTCGeometry hgeometry)
- {
- Geometry* geometry = (Geometry*) hgeometry; // no ref counting here!
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometryUserData);
- RTC_VERIFY_HANDLE(hgeometry);
- return geometry->getUserData();
- RTC_CATCH_END2(geometry);
- return nullptr;
- }
-
- RTC_API void rtcSetGeometryBoundsFunction (RTCGeometry hgeometry, RTCBoundsFunction bounds, void* userPtr)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryBoundsFunction);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setBoundsFunction(bounds,userPtr);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryDisplacementFunction (RTCGeometry hgeometry, RTCDisplacementFunctionN displacement)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryDisplacementFunction);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setDisplacementFunction(displacement);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryIntersectFunction (RTCGeometry hgeometry, RTCIntersectFunctionN intersect)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryIntersectFunction);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setIntersectFunctionN(intersect);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryPointQueryFunction(RTCGeometry hgeometry, RTCPointQueryFunction pointQuery)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryPointQueryFunction);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setPointQueryFunction(pointQuery);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API unsigned int rtcGetGeometryFirstHalfEdge(RTCGeometry hgeometry, unsigned int faceID)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometryFirstHalfEdge);
- return geometry->getFirstHalfEdge(faceID);
- RTC_CATCH_END2(geometry);
- return -1;
- }
-
- RTC_API unsigned int rtcGetGeometryFace(RTCGeometry hgeometry, unsigned int edgeID)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometryFace);
- return geometry->getFace(edgeID);
- RTC_CATCH_END2(geometry);
- return -1;
- }
-
- RTC_API unsigned int rtcGetGeometryNextHalfEdge(RTCGeometry hgeometry, unsigned int edgeID)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometryNextHalfEdge);
- return geometry->getNextHalfEdge(edgeID);
- RTC_CATCH_END2(geometry);
- return -1;
- }
-
- RTC_API unsigned int rtcGetGeometryPreviousHalfEdge(RTCGeometry hgeometry, unsigned int edgeID)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometryPreviousHalfEdge);
- return geometry->getPreviousHalfEdge(edgeID);
- RTC_CATCH_END2(geometry);
- return -1;
- }
-
- RTC_API unsigned int rtcGetGeometryOppositeHalfEdge(RTCGeometry hgeometry, unsigned int topologyID, unsigned int edgeID)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometryOppositeHalfEdge);
- return geometry->getOppositeHalfEdge(topologyID,edgeID);
- RTC_CATCH_END2(geometry);
- return -1;
- }
-
- RTC_API void rtcSetGeometryOccludedFunction (RTCGeometry hgeometry, RTCOccludedFunctionN occluded)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetOccludedFunctionN);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setOccludedFunctionN(occluded);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryIntersectFilterFunction (RTCGeometry hgeometry, RTCFilterFunctionN filter)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryIntersectFilterFunction);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setIntersectionFilterFunctionN(filter);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcSetGeometryOccludedFilterFunction (RTCGeometry hgeometry, RTCFilterFunctionN filter)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcSetGeometryOccludedFilterFunction);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->setOcclusionFilterFunctionN(filter);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcInterpolate(const RTCInterpolateArguments* const args)
- {
- Geometry* geometry = (Geometry*) args->geometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcInterpolate);
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(args->geometry);
-#endif
- geometry->interpolate(args);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcInterpolateN(const RTCInterpolateNArguments* const args)
- {
- Geometry* geometry = (Geometry*) args->geometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcInterpolateN);
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(args->geometry);
-#endif
- geometry->interpolateN(args);
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcCommitGeometry (RTCGeometry hgeometry)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcCommitGeometry);
- RTC_VERIFY_HANDLE(hgeometry);
- return geometry->commit();
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API unsigned int rtcAttachGeometry (RTCScene hscene, RTCGeometry hgeometry)
- {
- Scene* scene = (Scene*) hscene;
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcAttachGeometry);
- RTC_VERIFY_HANDLE(hscene);
- RTC_VERIFY_HANDLE(hgeometry);
- if (scene->device != geometry->device)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"inputs are from different devices");
- return scene->bind(RTC_INVALID_GEOMETRY_ID,geometry);
- RTC_CATCH_END2(scene);
- return -1;
- }
-
- RTC_API void rtcAttachGeometryByID (RTCScene hscene, RTCGeometry hgeometry, unsigned int geomID)
- {
- Scene* scene = (Scene*) hscene;
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcAttachGeometryByID);
- RTC_VERIFY_HANDLE(hscene);
- RTC_VERIFY_HANDLE(hgeometry);
- RTC_VERIFY_GEOMID(geomID);
- if (scene->device != geometry->device)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"inputs are from different devices");
- scene->bind(geomID,geometry);
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcDetachGeometry (RTCScene hscene, unsigned int geomID)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcDetachGeometry);
- RTC_VERIFY_HANDLE(hscene);
- RTC_VERIFY_GEOMID(geomID);
- scene->detachGeometry(geomID);
- RTC_CATCH_END2(scene);
- }
-
- RTC_API void rtcRetainGeometry (RTCGeometry hgeometry)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcRetainGeometry);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->refInc();
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API void rtcReleaseGeometry (RTCGeometry hgeometry)
- {
- Geometry* geometry = (Geometry*) hgeometry;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcReleaseGeometry);
- RTC_VERIFY_HANDLE(hgeometry);
- geometry->refDec();
- RTC_CATCH_END2(geometry);
- }
-
- RTC_API RTCGeometry rtcGetGeometry (RTCScene hscene, unsigned int geomID)
- {
- Scene* scene = (Scene*) hscene;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcGetGeometry);
-#if defined(DEBUG)
- RTC_VERIFY_HANDLE(hscene);
- RTC_VERIFY_GEOMID(geomID);
-#endif
- return (RTCGeometry) scene->get(geomID);
- RTC_CATCH_END2(scene);
- return nullptr;
- }
-
-RTC_NAMESPACE_END
diff --git a/thirdparty/embree-aarch64/kernels/common/rtcore.h b/thirdparty/embree-aarch64/kernels/common/rtcore.h
deleted file mode 100644
index 4b070e122b..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/rtcore.h
+++ /dev/null
@@ -1,142 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../../include/embree3/rtcore.h"
-RTC_NAMESPACE_USE
-
-namespace embree
-{
- /*! decoding of intersection flags */
- __forceinline bool isCoherent (RTCIntersectContextFlags flags) { return (flags & RTC_INTERSECT_CONTEXT_FLAG_COHERENT) == RTC_INTERSECT_CONTEXT_FLAG_COHERENT; }
- __forceinline bool isIncoherent(RTCIntersectContextFlags flags) { return (flags & RTC_INTERSECT_CONTEXT_FLAG_COHERENT) == RTC_INTERSECT_CONTEXT_FLAG_INCOHERENT; }
-
-#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR >= 8)
-# define USE_TASK_ARENA 1
-#else
-# define USE_TASK_ARENA 0
-#endif
-
-#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION >= 11009) // TBB 2019 Update 9
-# define TASKING_TBB_USE_TASK_ISOLATION 1
-#else
-# define TASKING_TBB_USE_TASK_ISOLATION 0
-#endif
-
-/*! Macros used in the rtcore API implementation */
-// -- GODOT start --
-// #define RTC_CATCH_BEGIN try {
-#define RTC_CATCH_BEGIN
-
-// #define RTC_CATCH_END(device) \
-// } catch (std::bad_alloc&) { \
-// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
-// } catch (rtcore_error& e) { \
-// Device::process_error(device,e.error,e.what()); \
-// } catch (std::exception& e) { \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
-// } catch (...) { \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
-// }
-#define RTC_CATCH_END(device)
-
-// #define RTC_CATCH_END2(scene) \
-// } catch (std::bad_alloc&) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
-// } catch (rtcore_error& e) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,e.error,e.what()); \
-// } catch (std::exception& e) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
-// } catch (...) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
-// }
-#define RTC_CATCH_END2(scene)
-
-// #define RTC_CATCH_END2_FALSE(scene) \
-// } catch (std::bad_alloc&) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
-// return false; \
-// } catch (rtcore_error& e) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,e.error,e.what()); \
-// return false; \
-// } catch (std::exception& e) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
-// return false; \
-// } catch (...) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
-// return false; \
-// }
-#define RTC_CATCH_END2_FALSE(scene) return false;
-// -- GODOT end --
-
-#define RTC_VERIFY_HANDLE(handle) \
- if (handle == nullptr) { \
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \
- }
-
-#define RTC_VERIFY_GEOMID(id) \
- if (id == RTC_INVALID_GEOMETRY_ID) { \
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \
- }
-
-#define RTC_VERIFY_UPPER(id,upper) \
- if (id > upper) { \
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \
- }
-
-#define RTC_VERIFY_RANGE(id,lower,upper) \
- if (id < lower || id > upper) \
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"argument out of bounds");
-
-#if 0 // enable to debug print all API calls
-#define RTC_TRACE(x) std::cout << #x << std::endl;
-#else
-#define RTC_TRACE(x)
-#endif
-
-// -- GODOT begin --
-// /*! used to throw embree API errors */
-// struct rtcore_error : public std::exception
-// {
-// __forceinline rtcore_error(RTCError error, const std::string& str)
-// : error(error), str(str) {}
-//
-// ~rtcore_error() throw() {}
-//
-// const char* what () const throw () {
-// return str.c_str();
-// }
-//
-// RTCError error;
-// std::string str;
-// };
-// -- GODOT end --
-
-#if defined(DEBUG) // only report file and line in debug mode
- // -- GODOT begin --
- // #define throw_RTCError(error,str) \
- // throw rtcore_error(error,std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
- #define throw_RTCError(error,str) \
- printf(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)), abort();
- // -- GODOT end --
-#else
- // -- GODOT begin --
- // #define throw_RTCError(error,str) \
- // throw rtcore_error(error,str);
- #define throw_RTCError(error,str) \
- abort();
- // -- GODOT end --
-#endif
-
-#define RTC_BUILD_ARGUMENTS_HAS(settings,member) \
- (settings.byteSize > (offsetof(RTCBuildArguments,member)+sizeof(settings.member)))
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/rtcore_builder.cpp b/thirdparty/embree-aarch64/kernels/common/rtcore_builder.cpp
deleted file mode 100644
index 6bb96bba07..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/rtcore_builder.cpp
+++ /dev/null
@@ -1,442 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#define RTC_EXPORT_API
-
-#include "default.h"
-#include "device.h"
-#include "scene.h"
-#include "context.h"
-#include "alloc.h"
-
-#include "../builders/bvh_builder_sah.h"
-#include "../builders/bvh_builder_morton.h"
-
-namespace embree
-{
- namespace isa // FIXME: support more ISAs for builders
- {
- struct BVH : public RefCount
- {
- BVH (Device* device)
- : device(device), allocator(device,true), morton_src(device,0), morton_tmp(device,0)
- {
- device->refInc();
- }
-
- ~BVH() {
- device->refDec();
- }
-
- public:
- Device* device;
- FastAllocator allocator;
- mvector<BVHBuilderMorton::BuildPrim> morton_src;
- mvector<BVHBuilderMorton::BuildPrim> morton_tmp;
- };
-
- void* rtcBuildBVHMorton(const RTCBuildArguments* arguments)
- {
- BVH* bvh = (BVH*) arguments->bvh;
- RTCBuildPrimitive* prims_i = arguments->primitives;
- size_t primitiveCount = arguments->primitiveCount;
- RTCCreateNodeFunction createNode = arguments->createNode;
- RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren;
- RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds;
- RTCCreateLeafFunction createLeaf = arguments->createLeaf;
- RTCProgressMonitorFunction buildProgress = arguments->buildProgress;
- void* userPtr = arguments->userPtr;
-
- std::atomic<size_t> progress(0);
-
- /* initialize temporary arrays for morton builder */
- PrimRef* prims = (PrimRef*) prims_i;
- mvector<BVHBuilderMorton::BuildPrim>& morton_src = bvh->morton_src;
- mvector<BVHBuilderMorton::BuildPrim>& morton_tmp = bvh->morton_tmp;
- morton_src.resize(primitiveCount);
- morton_tmp.resize(primitiveCount);
-
- /* compute centroid bounds */
- const BBox3fa centBounds = parallel_reduce ( size_t(0), primitiveCount, BBox3fa(empty), [&](const range<size_t>& r) -> BBox3fa {
-
- BBox3fa bounds(empty);
- for (size_t i=r.begin(); i<r.end(); i++)
- bounds.extend(prims[i].bounds().center2());
- return bounds;
- }, BBox3fa::merge);
-
- /* compute morton codes */
- BVHBuilderMorton::MortonCodeMapping mapping(centBounds);
- parallel_for ( size_t(0), primitiveCount, [&](const range<size_t>& r) {
- BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton_src[r.begin()]);
- for (size_t i=r.begin(); i<r.end(); i++) {
- generator(prims[i].bounds(),(unsigned) i);
- }
- });
-
- /* start morton build */
- std::pair<void*,BBox3fa> root = BVHBuilderMorton::build<std::pair<void*,BBox3fa>>(
-
- /* thread local allocator for fast allocations */
- [&] () -> FastAllocator::CachedAllocator {
- return bvh->allocator.getCachedAllocator();
- },
-
- /* lambda function that allocates BVH nodes */
- [&] ( const FastAllocator::CachedAllocator& alloc, size_t N ) -> void* {
- return createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr);
- },
-
- /* lambda function that sets bounds */
- [&] (void* node, const std::pair<void*,BBox3fa>* children, size_t N) -> std::pair<void*,BBox3fa>
- {
- BBox3fa bounds = empty;
- void* childptrs[BVHBuilderMorton::MAX_BRANCHING_FACTOR];
- const RTCBounds* cbounds[BVHBuilderMorton::MAX_BRANCHING_FACTOR];
- for (size_t i=0; i<N; i++) {
- bounds.extend(children[i].second);
- childptrs[i] = children[i].first;
- cbounds[i] = (const RTCBounds*)&children[i].second;
- }
- setNodeBounds(node,cbounds,(unsigned int)N,userPtr);
- setNodeChildren(node,childptrs, (unsigned int)N,userPtr);
- return std::make_pair(node,bounds);
- },
-
- /* lambda function that creates BVH leaves */
- [&]( const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc) -> std::pair<void*,BBox3fa>
- {
- RTCBuildPrimitive localBuildPrims[RTC_BUILD_MAX_PRIMITIVES_PER_LEAF];
- BBox3fa bounds = empty;
- for (size_t i=0;i<current.size();i++)
- {
- const size_t id = morton_src[current.begin()+i].index;
- bounds.extend(prims[id].bounds());
- localBuildPrims[i] = prims_i[id];
- }
- void* node = createLeaf((RTCThreadLocalAllocator)&alloc,localBuildPrims,current.size(),userPtr);
- return std::make_pair(node,bounds);
- },
-
- /* lambda that calculates the bounds for some primitive */
- [&] (const BVHBuilderMorton::BuildPrim& morton) -> BBox3fa {
- return prims[morton.index].bounds();
- },
-
- /* progress monitor function */
- [&] (size_t dn) {
- if (!buildProgress) return true;
- const size_t n = progress.fetch_add(dn)+dn;
- const double f = std::min(1.0,double(n)/double(primitiveCount));
- return buildProgress(userPtr,f);
- },
-
- morton_src.data(),morton_tmp.data(),primitiveCount,
- *arguments);
-
- bvh->allocator.cleanup();
- return root.first;
- }
-
- void* rtcBuildBVHBinnedSAH(const RTCBuildArguments* arguments)
- {
- BVH* bvh = (BVH*) arguments->bvh;
- RTCBuildPrimitive* prims = arguments->primitives;
- size_t primitiveCount = arguments->primitiveCount;
- RTCCreateNodeFunction createNode = arguments->createNode;
- RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren;
- RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds;
- RTCCreateLeafFunction createLeaf = arguments->createLeaf;
- RTCProgressMonitorFunction buildProgress = arguments->buildProgress;
- void* userPtr = arguments->userPtr;
-
- std::atomic<size_t> progress(0);
-
- /* calculate priminfo */
- auto computeBounds = [&](const range<size_t>& r) -> CentGeomBBox3fa
- {
- CentGeomBBox3fa bounds(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- bounds.extend((BBox3fa&)prims[j]);
- return bounds;
- };
- const CentGeomBBox3fa bounds =
- parallel_reduce(size_t(0),primitiveCount,size_t(1024),size_t(1024),CentGeomBBox3fa(empty), computeBounds, CentGeomBBox3fa::merge2);
-
- const PrimInfo pinfo(0,primitiveCount,bounds);
-
- /* build BVH */
- void* root = BVHBuilderBinnedSAH::build<void*>(
-
- /* thread local allocator for fast allocations */
- [&] () -> FastAllocator::CachedAllocator {
- return bvh->allocator.getCachedAllocator();
- },
-
- /* lambda function that creates BVH nodes */
- [&](BVHBuilderBinnedSAH::BuildRecord* children, const size_t N, const FastAllocator::CachedAllocator& alloc) -> void*
- {
- void* node = createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr);
- const RTCBounds* cbounds[GeneralBVHBuilder::MAX_BRANCHING_FACTOR];
- for (size_t i=0; i<N; i++) cbounds[i] = (const RTCBounds*) &children[i].prims.geomBounds;
- setNodeBounds(node,cbounds, (unsigned int)N,userPtr);
- return node;
- },
-
- /* lambda function that updates BVH nodes */
- [&](const BVHBuilderBinnedSAH::BuildRecord& precord, const BVHBuilderBinnedSAH::BuildRecord* crecords, void* node, void** children, const size_t N) -> void* {
- setNodeChildren(node,children, (unsigned int)N,userPtr);
- return node;
- },
-
- /* lambda function that creates BVH leaves */
- [&](const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> void* {
- return createLeaf((RTCThreadLocalAllocator)&alloc,(RTCBuildPrimitive*)(prims+range.begin()),range.size(),userPtr);
- },
-
- /* progress monitor function */
- [&] (size_t dn) {
- if (!buildProgress) return true;
- const size_t n = progress.fetch_add(dn)+dn;
- const double f = std::min(1.0,double(n)/double(primitiveCount));
- return buildProgress(userPtr,f);
- },
-
- (PrimRef*)prims,pinfo,*arguments);
-
- bvh->allocator.cleanup();
- return root;
- }
-
- static __forceinline const std::pair<CentGeomBBox3fa,unsigned int> mergePair(const std::pair<CentGeomBBox3fa,unsigned int>& a, const std::pair<CentGeomBBox3fa,unsigned int>& b) {
- CentGeomBBox3fa centBounds = CentGeomBBox3fa::merge2(a.first,b.first);
- unsigned int maxGeomID = max(a.second,b.second);
- return std::pair<CentGeomBBox3fa,unsigned int>(centBounds,maxGeomID);
- }
-
- void* rtcBuildBVHSpatialSAH(const RTCBuildArguments* arguments)
- {
- BVH* bvh = (BVH*) arguments->bvh;
- RTCBuildPrimitive* prims = arguments->primitives;
- size_t primitiveCount = arguments->primitiveCount;
- RTCCreateNodeFunction createNode = arguments->createNode;
- RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren;
- RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds;
- RTCCreateLeafFunction createLeaf = arguments->createLeaf;
- RTCSplitPrimitiveFunction splitPrimitive = arguments->splitPrimitive;
- RTCProgressMonitorFunction buildProgress = arguments->buildProgress;
- void* userPtr = arguments->userPtr;
-
- std::atomic<size_t> progress(0);
-
- /* calculate priminfo */
-
- auto computeBounds = [&](const range<size_t>& r) -> std::pair<CentGeomBBox3fa,unsigned int>
- {
- CentGeomBBox3fa bounds(empty);
- unsigned maxGeomID = 0;
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- bounds.extend((BBox3fa&)prims[j]);
- maxGeomID = max(maxGeomID,prims[j].geomID);
- }
- return std::pair<CentGeomBBox3fa,unsigned int>(bounds,maxGeomID);
- };
-
-
- const std::pair<CentGeomBBox3fa,unsigned int> pair =
- parallel_reduce(size_t(0),primitiveCount,size_t(1024),size_t(1024),std::pair<CentGeomBBox3fa,unsigned int>(CentGeomBBox3fa(empty),0), computeBounds, mergePair);
-
- CentGeomBBox3fa bounds = pair.first;
- const unsigned int maxGeomID = pair.second;
-
- if (unlikely(maxGeomID >= ((unsigned int)1 << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS))))
- {
- /* fallback code for max geomID larger than threshold */
- return rtcBuildBVHBinnedSAH(arguments);
- }
-
- const PrimInfo pinfo(0,primitiveCount,bounds);
-
- /* function that splits a build primitive */
- struct Splitter
- {
- Splitter (RTCSplitPrimitiveFunction splitPrimitive, unsigned geomID, unsigned primID, void* userPtr)
- : splitPrimitive(splitPrimitive), geomID(geomID), primID(primID), userPtr(userPtr) {}
-
- __forceinline void operator() (PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const
- {
- prim.geomIDref() &= BVHBuilderBinnedFastSpatialSAH::GEOMID_MASK;
- splitPrimitive((RTCBuildPrimitive*)&prim,(unsigned)dim,pos,(RTCBounds*)&left_o,(RTCBounds*)&right_o,userPtr);
- left_o.geomIDref() = geomID; left_o.primIDref() = primID;
- right_o.geomIDref() = geomID; right_o.primIDref() = primID;
- }
-
- __forceinline void operator() (const BBox3fa& box, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const
- {
- PrimRef prim(box,geomID & BVHBuilderBinnedFastSpatialSAH::GEOMID_MASK,primID);
- splitPrimitive((RTCBuildPrimitive*)&prim,(unsigned)dim,pos,(RTCBounds*)&left_o,(RTCBounds*)&right_o,userPtr);
- }
-
- RTCSplitPrimitiveFunction splitPrimitive;
- unsigned geomID;
- unsigned primID;
- void* userPtr;
- };
-
- /* build BVH */
- void* root = BVHBuilderBinnedFastSpatialSAH::build<void*>(
-
- /* thread local allocator for fast allocations */
- [&] () -> FastAllocator::CachedAllocator {
- return bvh->allocator.getCachedAllocator();
- },
-
- /* lambda function that creates BVH nodes */
- [&] (BVHBuilderBinnedFastSpatialSAH::BuildRecord* children, const size_t N, const FastAllocator::CachedAllocator& alloc) -> void*
- {
- void* node = createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr);
- const RTCBounds* cbounds[GeneralBVHBuilder::MAX_BRANCHING_FACTOR];
- for (size_t i=0; i<N; i++) cbounds[i] = (const RTCBounds*) &children[i].prims.geomBounds;
- setNodeBounds(node,cbounds, (unsigned int)N,userPtr);
- return node;
- },
-
- /* lambda function that updates BVH nodes */
- [&] (const BVHBuilderBinnedFastSpatialSAH::BuildRecord& precord, const BVHBuilderBinnedFastSpatialSAH::BuildRecord* crecords, void* node, void** children, const size_t N) -> void* {
- setNodeChildren(node,children, (unsigned int)N,userPtr);
- return node;
- },
-
- /* lambda function that creates BVH leaves */
- [&] (const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> void* {
- return createLeaf((RTCThreadLocalAllocator)&alloc,(RTCBuildPrimitive*)(prims+range.begin()),range.size(),userPtr);
- },
-
- /* returns the splitter */
- [&] ( const PrimRef& prim ) -> Splitter {
- return Splitter(splitPrimitive,prim.geomID(),prim.primID(),userPtr);
- },
-
- /* progress monitor function */
- [&] (size_t dn) {
- if (!buildProgress) return true;
- const size_t n = progress.fetch_add(dn)+dn;
- const double f = std::min(1.0,double(n)/double(primitiveCount));
- return buildProgress(userPtr,f);
- },
-
- (PrimRef*)prims,
- arguments->primitiveArrayCapacity,
- pinfo,*arguments);
-
- bvh->allocator.cleanup();
- return root;
- }
- }
-}
-
-using namespace embree;
-using namespace embree::isa;
-
-RTC_NAMESPACE_BEGIN
-
- RTC_API RTCBVH rtcNewBVH(RTCDevice device)
- {
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcNewAllocator);
- RTC_VERIFY_HANDLE(device);
- BVH* bvh = new BVH((Device*)device);
- return (RTCBVH) bvh->refInc();
- RTC_CATCH_END((Device*)device);
- return nullptr;
- }
-
- RTC_API void* rtcBuildBVH(const RTCBuildArguments* arguments)
- {
- BVH* bvh = (BVH*) arguments->bvh;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcBuildBVH);
- RTC_VERIFY_HANDLE(bvh);
- RTC_VERIFY_HANDLE(arguments);
- RTC_VERIFY_HANDLE(arguments->createNode);
- RTC_VERIFY_HANDLE(arguments->setNodeChildren);
- RTC_VERIFY_HANDLE(arguments->setNodeBounds);
- RTC_VERIFY_HANDLE(arguments->createLeaf);
-
- if (arguments->primitiveArrayCapacity < arguments->primitiveCount)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"primitiveArrayCapacity must be greater or equal to primitiveCount")
-
- /* initialize the allocator */
- bvh->allocator.init_estimate(arguments->primitiveCount*sizeof(BBox3fa));
- bvh->allocator.reset();
-
- /* switch between differnet builders based on quality level */
- if (arguments->buildQuality == RTC_BUILD_QUALITY_LOW)
- return rtcBuildBVHMorton(arguments);
- else if (arguments->buildQuality == RTC_BUILD_QUALITY_MEDIUM)
- return rtcBuildBVHBinnedSAH(arguments);
- else if (arguments->buildQuality == RTC_BUILD_QUALITY_HIGH) {
- if (arguments->splitPrimitive == nullptr || arguments->primitiveArrayCapacity <= arguments->primitiveCount)
- return rtcBuildBVHBinnedSAH(arguments);
- else
- return rtcBuildBVHSpatialSAH(arguments);
- }
- else
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid build quality");
-
- /* if we are in dynamic mode, then do not clear temporary data */
- if (!(arguments->buildFlags & RTC_BUILD_FLAG_DYNAMIC))
- {
- bvh->morton_src.clear();
- bvh->morton_tmp.clear();
- }
-
- RTC_CATCH_END(bvh->device);
- return nullptr;
- }
-
- RTC_API void* rtcThreadLocalAlloc(RTCThreadLocalAllocator localAllocator, size_t bytes, size_t align)
- {
- FastAllocator::CachedAllocator* alloc = (FastAllocator::CachedAllocator*) localAllocator;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcThreadLocalAlloc);
- return alloc->malloc0(bytes,align);
- RTC_CATCH_END(alloc->alloc->getDevice());
- return nullptr;
- }
-
- RTC_API void rtcMakeStaticBVH(RTCBVH hbvh)
- {
- BVH* bvh = (BVH*) hbvh;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcStaticBVH);
- RTC_VERIFY_HANDLE(hbvh);
- bvh->morton_src.clear();
- bvh->morton_tmp.clear();
- RTC_CATCH_END(bvh->device);
- }
-
- RTC_API void rtcRetainBVH(RTCBVH hbvh)
- {
- BVH* bvh = (BVH*) hbvh;
- Device* device = bvh ? bvh->device : nullptr;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcRetainBVH);
- RTC_VERIFY_HANDLE(hbvh);
- bvh->refInc();
- RTC_CATCH_END(device);
- }
-
- RTC_API void rtcReleaseBVH(RTCBVH hbvh)
- {
- BVH* bvh = (BVH*) hbvh;
- Device* device = bvh ? bvh->device : nullptr;
- RTC_CATCH_BEGIN;
- RTC_TRACE(rtcReleaseBVH);
- RTC_VERIFY_HANDLE(hbvh);
- bvh->refDec();
- RTC_CATCH_END(device);
- }
-
-RTC_NAMESPACE_END
diff --git a/thirdparty/embree-aarch64/kernels/common/scene.cpp b/thirdparty/embree-aarch64/kernels/common/scene.cpp
deleted file mode 100644
index 1e23aeb415..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene.cpp
+++ /dev/null
@@ -1,976 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "scene.h"
-
-#include "../bvh/bvh4_factory.h"
-#include "../bvh/bvh8_factory.h"
-#include "../../common/algorithms/parallel_reduce.h"
-
-namespace embree
-{
- /* error raising rtcIntersect and rtcOccluded functions */
- void missing_rtcCommit() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); }
- void invalid_rtcIntersect1() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect and rtcOccluded not enabled"); }
- void invalid_rtcIntersect4() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect4 and rtcOccluded4 not enabled"); }
- void invalid_rtcIntersect8() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect8 and rtcOccluded8 not enabled"); }
- void invalid_rtcIntersect16() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect16 and rtcOccluded16 not enabled"); }
- void invalid_rtcIntersectN() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersectN and rtcOccludedN not enabled"); }
-
- Scene::Scene (Device* device)
- : device(device),
- flags_modified(true), enabled_geometry_types(0),
- scene_flags(RTC_SCENE_FLAG_NONE),
- quality_flags(RTC_BUILD_QUALITY_MEDIUM),
- is_build(false), modified(true),
- progressInterface(this), progress_monitor_function(nullptr), progress_monitor_ptr(nullptr), progress_monitor_counter(0)
- {
- device->refInc();
-
- intersectors = Accel::Intersectors(missing_rtcCommit);
-
- /* one can overwrite flags through device for debugging */
- if (device->quality_flags != -1)
- quality_flags = (RTCBuildQuality) device->quality_flags;
- if (device->scene_flags != -1)
- scene_flags = (RTCSceneFlags) device->scene_flags;
- }
-
- Scene::~Scene() noexcept
- {
- device->refDec();
- }
-
- void Scene::printStatistics()
- {
- /* calculate maximum number of time segments */
- unsigned max_time_steps = 0;
- for (size_t i=0; i<size(); i++) {
- if (!get(i)) continue;
- max_time_steps = max(max_time_steps,get(i)->numTimeSteps);
- }
-
- /* initialize vectors*/
- std::vector<size_t> statistics[Geometry::GTY_END];
- for (size_t i=0; i<Geometry::GTY_END; i++)
- statistics[i].resize(max_time_steps);
-
- /* gather statistics */
- for (size_t i=0; i<size(); i++)
- {
- if (!get(i)) continue;
- int ty = get(i)->getType();
- assert(ty<Geometry::GTY_END);
- int timesegments = get(i)->numTimeSegments();
- assert((unsigned int)timesegments < max_time_steps);
- statistics[ty][timesegments] += get(i)->size();
- }
-
- /* print statistics */
- std::cout << std::setw(23) << "segments" << ": ";
- for (size_t t=0; t<max_time_steps; t++)
- std::cout << std::setw(10) << t;
- std::cout << std::endl;
-
- std::cout << "-------------------------";
- for (size_t t=0; t<max_time_steps; t++)
- std::cout << "----------";
- std::cout << std::endl;
-
- for (size_t p=0; p<Geometry::GTY_END; p++)
- {
- if (std::string(Geometry::gtype_names[p]) == "") continue;
- std::cout << std::setw(23) << Geometry::gtype_names[p] << ": ";
- for (size_t t=0; t<max_time_steps; t++)
- std::cout << std::setw(10) << statistics[p][t];
- std::cout << std::endl;
- }
- }
-
- void Scene::createTriangleAccel()
- {
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- if (device->tri_accel == "default")
- {
- if (quality_flags != RTC_BUILD_QUALITY_LOW)
- {
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
- switch (mode) {
- case /*0b00*/ 0:
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX())
- {
- if (quality_flags == RTC_BUILD_QUALITY_HIGH)
- accels_add(device->bvh8_factory->BVH8Triangle4(this,BVHFactory::BuildVariant::HIGH_QUALITY,BVHFactory::IntersectVariant::FAST));
- else
- accels_add(device->bvh8_factory->BVH8Triangle4(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST));
- }
- else
-#endif
- {
- if (quality_flags == RTC_BUILD_QUALITY_HIGH)
- accels_add(device->bvh4_factory->BVH4Triangle4(this,BVHFactory::BuildVariant::HIGH_QUALITY,BVHFactory::IntersectVariant::FAST));
- else
- accels_add(device->bvh4_factory->BVH4Triangle4(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST));
- }
- break;
-
- case /*0b01*/ 1:
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX())
- accels_add(device->bvh8_factory->BVH8Triangle4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST));
- else
-#endif
- accels_add(device->bvh4_factory->BVH4Triangle4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST));
-
- break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- else /* dynamic */
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX())
- {
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
- switch (mode) {
- case /*0b00*/ 0: accels_add(device->bvh8_factory->BVH8Triangle4 (this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b01*/ 1: accels_add(device->bvh8_factory->BVH8Triangle4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- else
-#endif
- {
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
- switch (mode) {
- case /*0b00*/ 0: accels_add(device->bvh4_factory->BVH4Triangle4 (this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b01*/ 1: accels_add(device->bvh4_factory->BVH4Triangle4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- }
- }
- else if (device->tri_accel == "bvh4.triangle4") accels_add(device->bvh4_factory->BVH4Triangle4 (this));
- else if (device->tri_accel == "bvh4.triangle4v") accels_add(device->bvh4_factory->BVH4Triangle4v(this));
- else if (device->tri_accel == "bvh4.triangle4i") accels_add(device->bvh4_factory->BVH4Triangle4i(this));
- else if (device->tri_accel == "qbvh4.triangle4i") accels_add(device->bvh4_factory->BVH4QuantizedTriangle4i(this));
-
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->tri_accel == "bvh8.triangle4") accels_add(device->bvh8_factory->BVH8Triangle4 (this));
- else if (device->tri_accel == "bvh8.triangle4v") accels_add(device->bvh8_factory->BVH8Triangle4v(this));
- else if (device->tri_accel == "bvh8.triangle4i") accels_add(device->bvh8_factory->BVH8Triangle4i(this));
- else if (device->tri_accel == "qbvh8.triangle4i") accels_add(device->bvh8_factory->BVH8QuantizedTriangle4i(this));
- else if (device->tri_accel == "qbvh8.triangle4") accels_add(device->bvh8_factory->BVH8QuantizedTriangle4(this));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown triangle acceleration structure "+device->tri_accel);
-#endif
- }
-
- void Scene::createTriangleMBAccel()
- {
-#if defined(EMBREE_GEOMETRY_TRIANGLE)
- if (device->tri_accel_mb == "default")
- {
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
-
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX2()) // BVH8 reduces performance on AVX only-machines
- {
- switch (mode) {
- case /*0b00*/ 0: accels_add(device->bvh8_factory->BVH8Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b01*/ 1: accels_add(device->bvh8_factory->BVH8Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- else
-#endif
- {
- switch (mode) {
- case /*0b00*/ 0: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b01*/ 1: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- }
- else if (device->tri_accel_mb == "bvh4.triangle4imb") accels_add(device->bvh4_factory->BVH4Triangle4iMB(this));
- else if (device->tri_accel_mb == "bvh4.triangle4vmb") accels_add(device->bvh4_factory->BVH4Triangle4vMB(this));
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->tri_accel_mb == "bvh8.triangle4imb") accels_add(device->bvh8_factory->BVH8Triangle4iMB(this));
- else if (device->tri_accel_mb == "bvh8.triangle4vmb") accels_add(device->bvh8_factory->BVH8Triangle4vMB(this));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown motion blur triangle acceleration structure "+device->tri_accel_mb);
-#endif
- }
-
- void Scene::createQuadAccel()
- {
-#if defined(EMBREE_GEOMETRY_QUAD)
- if (device->quad_accel == "default")
- {
- if (quality_flags != RTC_BUILD_QUALITY_LOW)
- {
- /* static */
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
- switch (mode) {
- case /*0b00*/ 0:
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX())
- {
- if (quality_flags == RTC_BUILD_QUALITY_HIGH)
- accels_add(device->bvh8_factory->BVH8Quad4v(this,BVHFactory::BuildVariant::HIGH_QUALITY,BVHFactory::IntersectVariant::FAST));
- else
- accels_add(device->bvh8_factory->BVH8Quad4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST));
- }
- else
-#endif
- {
- if (quality_flags == RTC_BUILD_QUALITY_HIGH)
- accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::HIGH_QUALITY,BVHFactory::IntersectVariant::FAST));
- else
- accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST));
- }
- break;
-
- case /*0b01*/ 1:
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX())
- accels_add(device->bvh8_factory->BVH8Quad4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST));
- else
-#endif
- accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST));
- break;
-
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Quad4i(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Quad4i(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- else /* dynamic */
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX())
- {
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
- switch (mode) {
- case /*0b00*/ 0: accels_add(device->bvh8_factory->BVH8Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b01*/ 1: accels_add(device->bvh8_factory->BVH8Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- else
-#endif
- {
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
- switch (mode) {
- case /*0b00*/ 0: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b01*/ 1: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- }
- }
- else if (device->quad_accel == "bvh4.quad4v") accels_add(device->bvh4_factory->BVH4Quad4v(this));
- else if (device->quad_accel == "bvh4.quad4i") accels_add(device->bvh4_factory->BVH4Quad4i(this));
- else if (device->quad_accel == "qbvh4.quad4i") accels_add(device->bvh4_factory->BVH4QuantizedQuad4i(this));
-
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->quad_accel == "bvh8.quad4v") accels_add(device->bvh8_factory->BVH8Quad4v(this));
- else if (device->quad_accel == "bvh8.quad4i") accels_add(device->bvh8_factory->BVH8Quad4i(this));
- else if (device->quad_accel == "qbvh8.quad4i") accels_add(device->bvh8_factory->BVH8QuantizedQuad4i(this));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown quad acceleration structure "+device->quad_accel);
-#endif
- }
-
- void Scene::createQuadMBAccel()
- {
-#if defined(EMBREE_GEOMETRY_QUAD)
- if (device->quad_accel_mb == "default")
- {
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
- switch (mode) {
- case /*0b00*/ 0:
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX())
- accels_add(device->bvh8_factory->BVH8Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST));
- else
-#endif
- accels_add(device->bvh4_factory->BVH4Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST));
- break;
-
- case /*0b01*/ 1:
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX())
- accels_add(device->bvh8_factory->BVH8Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST));
- else
-#endif
- accels_add(device->bvh4_factory->BVH4Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST));
- break;
-
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- else if (device->quad_accel_mb == "bvh4.quad4imb") accels_add(device->bvh4_factory->BVH4Quad4iMB(this));
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->quad_accel_mb == "bvh8.quad4imb") accels_add(device->bvh8_factory->BVH8Quad4iMB(this));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown quad motion blur acceleration structure "+device->quad_accel_mb);
-#endif
- }
-
- void Scene::createHairAccel()
- {
-#if defined(EMBREE_GEOMETRY_CURVE) || defined(EMBREE_GEOMETRY_POINT)
- if (device->hair_accel == "default")
- {
- int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel();
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX2()) // only enable on HSW machines, for SNB this codepath is slower
- {
- switch (mode) {
- case /*0b00*/ 0: accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8v(this,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b01*/ 1: accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8v(this,BVHFactory::IntersectVariant::ROBUST)); break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve8i(this,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve8i(this,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- else
-#endif
- {
- switch (mode) {
- case /*0b00*/ 0: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4v(this,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b01*/ 1: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4v(this,BVHFactory::IntersectVariant::ROBUST)); break;
- case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4i(this,BVHFactory::IntersectVariant::FAST)); break;
- case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4i(this,BVHFactory::IntersectVariant::ROBUST)); break;
- }
- }
- }
- else if (device->hair_accel == "bvh4obb.virtualcurve4v" ) accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4v(this,BVHFactory::IntersectVariant::FAST));
- else if (device->hair_accel == "bvh4obb.virtualcurve4i" ) accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4i(this,BVHFactory::IntersectVariant::FAST));
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->hair_accel == "bvh8obb.virtualcurve8v" ) accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8v(this,BVHFactory::IntersectVariant::FAST));
- else if (device->hair_accel == "bvh4obb.virtualcurve8i" ) accels_add(device->bvh4_factory->BVH4OBBVirtualCurve8i(this,BVHFactory::IntersectVariant::FAST));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown hair acceleration structure "+device->hair_accel);
-#endif
- }
-
- void Scene::createHairMBAccel()
- {
-#if defined(EMBREE_GEOMETRY_CURVE) || defined(EMBREE_GEOMETRY_POINT)
- if (device->hair_accel_mb == "default")
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX2()) // only enable on HSW machines, on SNB this codepath is slower
- {
- if (isRobustAccel()) accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8iMB(this,BVHFactory::IntersectVariant::ROBUST));
- else accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8iMB(this,BVHFactory::IntersectVariant::FAST));
- }
- else
-#endif
- {
- if (isRobustAccel()) accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4iMB(this,BVHFactory::IntersectVariant::ROBUST));
- else accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4iMB(this,BVHFactory::IntersectVariant::FAST));
- }
- }
- else if (device->hair_accel_mb == "bvh4.virtualcurve4imb") accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4iMB(this,BVHFactory::IntersectVariant::FAST));
-
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->hair_accel_mb == "bvh4.virtualcurve8imb") accels_add(device->bvh4_factory->BVH4OBBVirtualCurve8iMB(this,BVHFactory::IntersectVariant::FAST));
- else if (device->hair_accel_mb == "bvh8.virtualcurve8imb") accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8iMB(this,BVHFactory::IntersectVariant::FAST));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown motion blur hair acceleration structure "+device->hair_accel_mb);
-#endif
- }
-
- void Scene::createSubdivAccel()
- {
-#if defined(EMBREE_GEOMETRY_SUBDIVISION)
- if (device->subdiv_accel == "default") {
- accels_add(device->bvh4_factory->BVH4SubdivPatch1(this));
- }
- else if (device->subdiv_accel == "bvh4.grid.eager" ) accels_add(device->bvh4_factory->BVH4SubdivPatch1(this));
- else if (device->subdiv_accel == "bvh4.subdivpatch1eager" ) accels_add(device->bvh4_factory->BVH4SubdivPatch1(this));
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown subdiv accel "+device->subdiv_accel);
-#endif
- }
-
- void Scene::createSubdivMBAccel()
- {
-#if defined(EMBREE_GEOMETRY_SUBDIVISION)
- if (device->subdiv_accel_mb == "default") {
- accels_add(device->bvh4_factory->BVH4SubdivPatch1MB(this));
- }
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown subdiv mblur accel "+device->subdiv_accel_mb);
-#endif
- }
-
- void Scene::createUserGeometryAccel()
- {
-#if defined(EMBREE_GEOMETRY_USER)
- if (device->object_accel == "default")
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX() && !isCompactAccel())
- {
- if (quality_flags != RTC_BUILD_QUALITY_LOW) {
- accels_add(device->bvh8_factory->BVH8UserGeometry(this,BVHFactory::BuildVariant::STATIC));
- } else {
- accels_add(device->bvh8_factory->BVH8UserGeometry(this,BVHFactory::BuildVariant::DYNAMIC));
- }
- }
- else
-#endif
- {
- if (quality_flags != RTC_BUILD_QUALITY_LOW) {
- accels_add(device->bvh4_factory->BVH4UserGeometry(this,BVHFactory::BuildVariant::STATIC));
- } else {
- accels_add(device->bvh4_factory->BVH4UserGeometry(this,BVHFactory::BuildVariant::DYNAMIC));
- }
- }
- }
- else if (device->object_accel == "bvh4.object") accels_add(device->bvh4_factory->BVH4UserGeometry(this));
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->object_accel == "bvh8.object") accels_add(device->bvh8_factory->BVH8UserGeometry(this));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown user geometry accel "+device->object_accel);
-#endif
- }
-
- void Scene::createUserGeometryMBAccel()
- {
-#if defined(EMBREE_GEOMETRY_USER)
- if (device->object_accel_mb == "default" ) {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX() && !isCompactAccel())
- accels_add(device->bvh8_factory->BVH8UserGeometryMB(this));
- else
-#endif
- accels_add(device->bvh4_factory->BVH4UserGeometryMB(this));
- }
- else if (device->object_accel_mb == "bvh4.object") accels_add(device->bvh4_factory->BVH4UserGeometryMB(this));
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->object_accel_mb == "bvh8.object") accels_add(device->bvh8_factory->BVH8UserGeometryMB(this));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown user geometry mblur accel "+device->object_accel_mb);
-#endif
- }
-
- void Scene::createInstanceAccel()
- {
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- // if (device->object_accel == "default")
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX() && !isCompactAccel()) {
- if (quality_flags != RTC_BUILD_QUALITY_LOW) {
- accels_add(device->bvh8_factory->BVH8Instance(this, false, BVHFactory::BuildVariant::STATIC));
- } else {
- accels_add(device->bvh8_factory->BVH8Instance(this, false, BVHFactory::BuildVariant::DYNAMIC));
- }
- }
- else
-#endif
- {
- if (quality_flags != RTC_BUILD_QUALITY_LOW) {
- accels_add(device->bvh4_factory->BVH4Instance(this, false, BVHFactory::BuildVariant::STATIC));
- } else {
- accels_add(device->bvh4_factory->BVH4Instance(this, false, BVHFactory::BuildVariant::DYNAMIC));
- }
- }
- }
- // else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown instance accel "+device->instance_accel);
-#endif
- }
-
- void Scene::createInstanceMBAccel()
- {
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- //if (device->instance_accel_mb == "default")
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX() && !isCompactAccel())
- accels_add(device->bvh8_factory->BVH8InstanceMB(this, false));
- else
-#endif
- accels_add(device->bvh4_factory->BVH4InstanceMB(this, false));
- }
- //else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown instance mblur accel "+device->instance_accel_mb);
-#endif
- }
-
- void Scene::createInstanceExpensiveAccel()
- {
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- // if (device->object_accel == "default")
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX() && !isCompactAccel()) {
- if (quality_flags != RTC_BUILD_QUALITY_LOW) {
- accels_add(device->bvh8_factory->BVH8Instance(this, true, BVHFactory::BuildVariant::STATIC));
- } else {
- accels_add(device->bvh8_factory->BVH8Instance(this, true, BVHFactory::BuildVariant::DYNAMIC));
- }
- }
- else
-#endif
- {
- if (quality_flags != RTC_BUILD_QUALITY_LOW) {
- accels_add(device->bvh4_factory->BVH4Instance(this, true, BVHFactory::BuildVariant::STATIC));
- } else {
- accels_add(device->bvh4_factory->BVH4Instance(this, true, BVHFactory::BuildVariant::DYNAMIC));
- }
- }
- }
- // else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown instance accel "+device->instance_accel);
-#endif
- }
-
- void Scene::createInstanceExpensiveMBAccel()
- {
-#if defined(EMBREE_GEOMETRY_INSTANCE)
- //if (device->instance_accel_mb == "default")
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX() && !isCompactAccel())
- accels_add(device->bvh8_factory->BVH8InstanceMB(this, true));
- else
-#endif
- accels_add(device->bvh4_factory->BVH4InstanceMB(this, true));
- }
- //else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown instance mblur accel "+device->instance_accel_mb);
-#endif
- }
-
- void Scene::createGridAccel()
- {
- BVHFactory::IntersectVariant ivariant = isRobustAccel() ? BVHFactory::IntersectVariant::ROBUST : BVHFactory::IntersectVariant::FAST;
-#if defined(EMBREE_GEOMETRY_GRID)
- if (device->grid_accel == "default")
- {
-#if defined (EMBREE_TARGET_SIMD8)
- if (device->canUseAVX() && !isCompactAccel())
- {
- accels_add(device->bvh8_factory->BVH8Grid(this,BVHFactory::BuildVariant::STATIC,ivariant));
- }
- else
-#endif
- {
- accels_add(device->bvh4_factory->BVH4Grid(this,BVHFactory::BuildVariant::STATIC,ivariant));
- }
- }
- else if (device->grid_accel == "bvh4.grid") accels_add(device->bvh4_factory->BVH4Grid(this,BVHFactory::BuildVariant::STATIC,ivariant));
-#if defined (EMBREE_TARGET_SIMD8)
- else if (device->grid_accel == "bvh8.grid") accels_add(device->bvh8_factory->BVH8Grid(this,BVHFactory::BuildVariant::STATIC,ivariant));
-#endif
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown grid accel "+device->grid_accel);
-#endif
-
- }
-
- void Scene::createGridMBAccel()
- {
-#if defined(EMBREE_GEOMETRY_GRID)
- if (device->grid_accel_mb == "default")
- {
- accels_add(device->bvh4_factory->BVH4GridMB(this,BVHFactory::BuildVariant::STATIC));
- }
- else if (device->grid_accel_mb == "bvh4mb.grid") accels_add(device->bvh4_factory->BVH4GridMB(this));
- else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown grid mb accel "+device->grid_accel);
-#endif
-
- }
-
- void Scene::clear() {
- }
-
- unsigned Scene::bind(unsigned geomID, Ref<Geometry> geometry)
- {
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(geometriesMutex);
-#else
- Lock<SpinLock> lock(geometriesMutex);
-#endif
- if (geomID == RTC_INVALID_GEOMETRY_ID) {
- geomID = id_pool.allocate();
- if (geomID == RTC_INVALID_GEOMETRY_ID)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"too many geometries inside scene");
- }
- else
- {
- if (!id_pool.add(geomID))
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid geometry ID provided");
- }
- if (geomID >= geometries.size()) {
- geometries.resize(geomID+1);
- vertices.resize(geomID+1);
- geometryModCounters_.resize(geomID+1);
- }
- geometries[geomID] = geometry;
- geometryModCounters_[geomID] = 0;
- if (geometry->isEnabled()) {
- setModified ();
- }
- return geomID;
- }
-
- void Scene::detachGeometry(size_t geomID)
- {
-#if defined(__aarch64__) && defined(BUILD_IOS)
- std::scoped_lock lock(geometriesMutex);
-#else
- Lock<SpinLock> lock(geometriesMutex);
-#endif
-
- if (geomID >= geometries.size())
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid geometry ID");
-
- Ref<Geometry>& geometry = geometries[geomID];
- if (geometry == null)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid geometry");
-
- if (geometry->isEnabled()) {
- setModified ();
- }
- accels_deleteGeometry(unsigned(geomID));
- id_pool.deallocate((unsigned)geomID);
- geometries[geomID] = null;
- vertices[geomID] = nullptr;
- geometryModCounters_[geomID] = 0;
- }
-
- void Scene::updateInterface()
- {
- is_build = true;
- }
-
- void Scene::commit_task ()
- {
- checkIfModifiedAndSet ();
- if (!isModified()) {
- return;
- }
-
- /* print scene statistics */
- if (device->verbosity(2))
- printStatistics();
-
- progress_monitor_counter = 0;
-
- /* gather scene stats and call preCommit function of each geometry */
- this->world = parallel_reduce (size_t(0), geometries.size(), GeometryCounts (),
- [this](const range<size_t>& r)->GeometryCounts
- {
- GeometryCounts c;
- for (auto i=r.begin(); i<r.end(); ++i)
- {
- if (geometries[i] && geometries[i]->isEnabled())
- {
- geometries[i]->preCommit();
- geometries[i]->addElementsToCount (c);
- c.numFilterFunctions += (int) geometries[i]->hasFilterFunctions();
- }
- }
- return c;
- },
- std::plus<GeometryCounts>()
- );
-
- /* select acceleration structures to build */
- unsigned int new_enabled_geometry_types = world.enabledGeometryTypesMask();
- if (flags_modified || new_enabled_geometry_types != enabled_geometry_types)
- {
- accels_init();
-
- /* we need to make all geometries modified, otherwise two level builder will
- not rebuild currently not modified geometries */
- parallel_for(geometryModCounters_.size(), [&] ( const size_t i ) {
- geometryModCounters_[i] = 0;
- });
-
- if (getNumPrimitives(TriangleMesh::geom_type,false)) createTriangleAccel();
- if (getNumPrimitives(TriangleMesh::geom_type,true)) createTriangleMBAccel();
- if (getNumPrimitives(QuadMesh::geom_type,false)) createQuadAccel();
- if (getNumPrimitives(QuadMesh::geom_type,true)) createQuadMBAccel();
- if (getNumPrimitives(GridMesh::geom_type,false)) createGridAccel();
- if (getNumPrimitives(GridMesh::geom_type,true)) createGridMBAccel();
- if (getNumPrimitives(SubdivMesh::geom_type,false)) createSubdivAccel();
- if (getNumPrimitives(SubdivMesh::geom_type,true)) createSubdivMBAccel();
- if (getNumPrimitives(Geometry::MTY_CURVES,false)) createHairAccel();
- if (getNumPrimitives(Geometry::MTY_CURVES,true)) createHairMBAccel();
- if (getNumPrimitives(UserGeometry::geom_type,false)) createUserGeometryAccel();
- if (getNumPrimitives(UserGeometry::geom_type,true)) createUserGeometryMBAccel();
- if (getNumPrimitives(Geometry::MTY_INSTANCE_CHEAP,false)) createInstanceAccel();
- if (getNumPrimitives(Geometry::MTY_INSTANCE_CHEAP,true)) createInstanceMBAccel();
- if (getNumPrimitives(Geometry::MTY_INSTANCE_EXPENSIVE,false)) createInstanceExpensiveAccel();
- if (getNumPrimitives(Geometry::MTY_INSTANCE_EXPENSIVE,true)) createInstanceExpensiveMBAccel();
-
- flags_modified = false;
- enabled_geometry_types = new_enabled_geometry_types;
- }
-
- /* select fast code path if no filter function is present */
- accels_select(hasFilterFunction());
-
- /* build all hierarchies of this scene */
- accels_build();
-
- /* make static geometry immutable */
- if (!isDynamicAccel()) {
- accels_immutable();
- flags_modified = true; // in non-dynamic mode we have to re-create accels
- }
-
- /* call postCommit function of each geometry */
- parallel_for(geometries.size(), [&] ( const size_t i ) {
- if (geometries[i] && geometries[i]->isEnabled()) {
- geometries[i]->postCommit();
- vertices[i] = geometries[i]->getCompactVertexArray();
- geometryModCounters_[i] = geometries[i]->getModCounter();
- }
- });
-
- updateInterface();
-
- if (device->verbosity(2)) {
- std::cout << "created scene intersector" << std::endl;
- accels_print(2);
- std::cout << "selected scene intersector" << std::endl;
- intersectors.print(2);
- }
-
- setModified(false);
- }
-
- void Scene::setBuildQuality(RTCBuildQuality quality_flags_i)
- {
- if (quality_flags == quality_flags_i) return;
- quality_flags = quality_flags_i;
- flags_modified = true;
- }
-
- RTCBuildQuality Scene::getBuildQuality() const {
- return quality_flags;
- }
-
- void Scene::setSceneFlags(RTCSceneFlags scene_flags_i)
- {
- if (scene_flags == scene_flags_i) return;
- scene_flags = scene_flags_i;
- flags_modified = true;
- }
-
- RTCSceneFlags Scene::getSceneFlags() const {
- return scene_flags;
- }
-
-#if defined(TASKING_INTERNAL)
-
- void Scene::commit (bool join)
- {
- Lock<MutexSys> buildLock(buildMutex,false);
-
- /* allocates own taskscheduler for each build */
- Ref<TaskScheduler> scheduler = nullptr;
- {
- Lock<MutexSys> lock(schedulerMutex);
- scheduler = this->scheduler;
- if (scheduler == null) {
- buildLock.lock();
- this->scheduler = scheduler = new TaskScheduler;
- }
- }
-
- /* worker threads join build */
- if (!buildLock.isLocked())
- {
- if (!join)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"use rtcJoinCommitScene to join a build operation");
-
- scheduler->join();
- return;
- }
-
- /* initiate build */
- // -- GODOT start --
- // try {
- scheduler->spawn_root([&]() { commit_task(); Lock<MutexSys> lock(schedulerMutex); this->scheduler = nullptr; }, 1, !join);
- // }
- // catch (...) {
- // accels_clear();
- // updateInterface();
- // Lock<MutexSys> lock(schedulerMutex);
- // this->scheduler = nullptr;
- // throw;
- // }
- // -- GODOT end --
- }
-
-#endif
-
-#if defined(TASKING_TBB) || defined(TASKING_GCD)
-
- void Scene::commit (bool join)
- {
-#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8)
- if (join)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcJoinCommitScene not supported with this TBB version");
-#endif
-
- /* try to obtain build lock */
- Lock<MutexSys> lock(buildMutex,buildMutex.try_lock());
-
- /* join hierarchy build */
- if (!lock.isLocked())
- {
-#if !TASKING_TBB_USE_TASK_ISOLATION
- if (!join)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invoking rtcCommitScene from multiple threads is not supported with this TBB version");
-#endif
-
- do {
-
-#if defined(TASKING_GCD)
- // Do Nothing
-#else
-#if USE_TASK_ARENA
- if (join) {
- device->arena->execute([&]{ group.wait(); });
- }
- else
-#endif
- {
- group.wait();
- }
-#endif
-
- pause_cpu();
- yield();
-
- } while (!buildMutex.try_lock());
-
- buildMutex.unlock();
- return;
- }
-
- /* for best performance set FTZ and DAZ flags in the MXCSR control and status register */
- const unsigned int mxcsr = _mm_getcsr();
- _mm_setcsr(mxcsr | /* FTZ */ (1<<15) | /* DAZ */ (1<<6));
-
- try {
-#if defined(TASKING_TBB)
-#if TBB_INTERFACE_VERSION_MAJOR < 8
- tbb::task_group_context ctx( tbb::task_group_context::isolated, tbb::task_group_context::default_traits);
-#else
- tbb::task_group_context ctx( tbb::task_group_context::isolated, tbb::task_group_context::default_traits | tbb::task_group_context::fp_settings );
-#endif
- //ctx.set_priority(tbb::priority_high);
-
-#if USE_TASK_ARENA
- if (join)
- {
- device->arena->execute([&]{
- group.run([&]{
- tbb::parallel_for (size_t(0), size_t(1), size_t(1), [&] (size_t) { commit_task(); }, ctx);
- });
- group.wait();
- });
- }
- else
-#endif
- {
- group.run([&]{
- tbb::parallel_for (size_t(0), size_t(1), size_t(1), [&] (size_t) { commit_task(); }, ctx);
- });
- group.wait();
- }
-
- /* reset MXCSR register again */
- _mm_setcsr(mxcsr);
-
-#elif defined(TASKING_GCD)
-
- commit_task();
-
-#endif // #if defined(TASKING_TBB)
-
- }
- catch (...)
- {
- /* reset MXCSR register again */
- _mm_setcsr(mxcsr);
-
- accels_clear();
- updateInterface();
- throw;
- }
- }
-#endif
-
-#if defined(TASKING_PPL)
-
- void Scene::commit (bool join)
- {
-#if defined(TASKING_PPL)
- if (join)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcJoinCommitScene not supported with PPL");
-#endif
-
- /* try to obtain build lock */
- Lock<MutexSys> lock(buildMutex);
-
- checkIfModifiedAndSet ();
- if (!isModified()) {
- return;
- }
-
- /* for best performance set FTZ and DAZ flags in the MXCSR control and status register */
- const unsigned int mxcsr = _mm_getcsr();
- _mm_setcsr(mxcsr | /* FTZ */ (1<<15) | /* DAZ */ (1<<6));
-
- try {
-
- group.run([&]{
- concurrency::parallel_for(size_t(0), size_t(1), size_t(1), [&](size_t) { commit_task(); });
- });
- group.wait();
-
- /* reset MXCSR register again */
- _mm_setcsr(mxcsr);
- }
- catch (...)
- {
- /* reset MXCSR register again */
- _mm_setcsr(mxcsr);
-
- accels_clear();
- updateInterface();
- throw;
- }
- }
-#endif
-
- void Scene::setProgressMonitorFunction(RTCProgressMonitorFunction func, void* ptr)
- {
- progress_monitor_function = func;
- progress_monitor_ptr = ptr;
- }
-
- void Scene::progressMonitor(double dn)
- {
- if (progress_monitor_function) {
- size_t n = size_t(dn) + progress_monitor_counter.fetch_add(size_t(dn));
- if (!progress_monitor_function(progress_monitor_ptr, n / (double(numPrimitives())))) {
- throw_RTCError(RTC_ERROR_CANCELLED,"progress monitor forced termination");
- }
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene.h b/thirdparty/embree-aarch64/kernels/common/scene.h
deleted file mode 100644
index b41c6cde91..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene.h
+++ /dev/null
@@ -1,390 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "device.h"
-#include "builder.h"
-#include "../../common/algorithms/parallel_any_of.h"
-#include "scene_triangle_mesh.h"
-#include "scene_quad_mesh.h"
-#include "scene_user_geometry.h"
-#include "scene_instance.h"
-#include "scene_curves.h"
-#include "scene_line_segments.h"
-#include "scene_subdiv_mesh.h"
-#include "scene_grid_mesh.h"
-#include "scene_points.h"
-#include "../subdiv/tessellation_cache.h"
-
-#include "acceln.h"
-#include "geometry.h"
-
-namespace embree
-{
- /*! Base class all scenes are derived from */
- class Scene : public AccelN
- {
- ALIGNED_CLASS_(std::alignment_of<Scene>::value);
-
- public:
- template<typename Ty, bool mblur = false>
- class Iterator
- {
- public:
- Iterator () {}
-
- Iterator (Scene* scene, bool all = false)
- : scene(scene), all(all) {}
-
- __forceinline Ty* at(const size_t i)
- {
- Geometry* geom = scene->geometries[i].ptr;
- if (geom == nullptr) return nullptr;
- if (!all && !geom->isEnabled()) return nullptr;
- const size_t mask = geom->getTypeMask() & Ty::geom_type;
- if (!(mask)) return nullptr;
- if ((geom->numTimeSteps != 1) != mblur) return nullptr;
- return (Ty*) geom;
- }
-
- __forceinline Ty* operator[] (const size_t i) {
- return at(i);
- }
-
- __forceinline size_t size() const {
- return scene->size();
- }
-
- __forceinline size_t numPrimitives() const {
- return scene->getNumPrimitives(Ty::geom_type,mblur);
- }
-
- __forceinline size_t maxPrimitivesPerGeometry()
- {
- size_t ret = 0;
- for (size_t i=0; i<scene->size(); i++) {
- Ty* mesh = at(i);
- if (mesh == nullptr) continue;
- ret = max(ret,mesh->size());
- }
- return ret;
- }
-
- __forceinline unsigned int maxGeomID()
- {
- unsigned int ret = 0;
- for (size_t i=0; i<scene->size(); i++) {
- Ty* mesh = at(i);
- if (mesh == nullptr) continue;
- ret = max(ret,(unsigned int)i);
- }
- return ret;
- }
-
- __forceinline unsigned maxTimeStepsPerGeometry()
- {
- unsigned ret = 0;
- for (size_t i=0; i<scene->size(); i++) {
- Ty* mesh = at(i);
- if (mesh == nullptr) continue;
- ret = max(ret,mesh->numTimeSteps);
- }
- return ret;
- }
-
- private:
- Scene* scene;
- bool all;
- };
-
- class Iterator2
- {
- public:
- Iterator2 () {}
-
- Iterator2 (Scene* scene, Geometry::GTypeMask typemask, bool mblur)
- : scene(scene), typemask(typemask), mblur(mblur) {}
-
- __forceinline Geometry* at(const size_t i)
- {
- Geometry* geom = scene->geometries[i].ptr;
- if (geom == nullptr) return nullptr;
- if (!geom->isEnabled()) return nullptr;
- if (!(geom->getTypeMask() & typemask)) return nullptr;
- if ((geom->numTimeSteps != 1) != mblur) return nullptr;
- return geom;
- }
-
- __forceinline Geometry* operator[] (const size_t i) {
- return at(i);
- }
-
- __forceinline size_t size() const {
- return scene->size();
- }
-
- private:
- Scene* scene;
- Geometry::GTypeMask typemask;
- bool mblur;
- };
-
- public:
-
- /*! Scene construction */
- Scene (Device* device);
-
- /*! Scene destruction */
- ~Scene () noexcept;
-
- private:
- /*! class is non-copyable */
- Scene (const Scene& other) DELETED; // do not implement
- Scene& operator= (const Scene& other) DELETED; // do not implement
-
- public:
- void createTriangleAccel();
- void createTriangleMBAccel();
- void createQuadAccel();
- void createQuadMBAccel();
- void createHairAccel();
- void createHairMBAccel();
- void createSubdivAccel();
- void createSubdivMBAccel();
- void createUserGeometryAccel();
- void createUserGeometryMBAccel();
- void createInstanceAccel();
- void createInstanceMBAccel();
- void createInstanceExpensiveAccel();
- void createInstanceExpensiveMBAccel();
- void createGridAccel();
- void createGridMBAccel();
-
- /*! prints statistics about the scene */
- void printStatistics();
-
- /*! clears the scene */
- void clear();
-
- /*! detaches some geometry */
- void detachGeometry(size_t geomID);
-
- void setBuildQuality(RTCBuildQuality quality_flags);
- RTCBuildQuality getBuildQuality() const;
-
- void setSceneFlags(RTCSceneFlags scene_flags);
- RTCSceneFlags getSceneFlags() const;
-
- void commit (bool join);
- void commit_task ();
- void build () {}
-
- void updateInterface();
-
- /* return number of geometries */
- __forceinline size_t size() const { return geometries.size(); }
-
- /* bind geometry to the scene */
- unsigned int bind (unsigned geomID, Ref<Geometry> geometry);
-
- /* determines if scene is modified */
- __forceinline bool isModified() const { return modified; }
-
- /* sets modified flag */
- __forceinline void setModified(bool f = true) {
- modified = f;
- }
-
- __forceinline bool isGeometryModified(size_t geomID)
- {
- Ref<Geometry>& g = geometries[geomID];
- if (!g) return false;
- return g->getModCounter() > geometryModCounters_[geomID];
- }
-
- protected:
-
- __forceinline void checkIfModifiedAndSet ()
- {
- if (isModified ()) return;
-
- auto geometryIsModified = [this](size_t geomID)->bool {
- return isGeometryModified(geomID);
- };
-
- if (parallel_any_of (size_t(0), geometries.size (), geometryIsModified)) {
- setModified ();
- }
- }
-
- public:
-
- /* get mesh by ID */
- __forceinline Geometry* get(size_t i) { assert(i < geometries.size()); return geometries[i].ptr; }
- __forceinline const Geometry* get(size_t i) const { assert(i < geometries.size()); return geometries[i].ptr; }
-
- template<typename Mesh>
- __forceinline Mesh* get(size_t i) {
- assert(i < geometries.size());
- assert(geometries[i]->getTypeMask() & Mesh::geom_type);
- return (Mesh*)geometries[i].ptr;
- }
- template<typename Mesh>
- __forceinline const Mesh* get(size_t i) const {
- assert(i < geometries.size());
- assert(geometries[i]->getTypeMask() & Mesh::geom_type);
- return (Mesh*)geometries[i].ptr;
- }
-
- template<typename Mesh>
- __forceinline Mesh* getSafe(size_t i) {
- assert(i < geometries.size());
- if (geometries[i] == null) return nullptr;
- if (!(geometries[i]->getTypeMask() & Mesh::geom_type)) return nullptr;
- else return (Mesh*) geometries[i].ptr;
- }
-
- __forceinline Ref<Geometry> get_locked(size_t i) {
- Lock<SpinLock> lock(geometriesMutex);
- assert(i < geometries.size());
- return geometries[i];
- }
-
- /* flag decoding */
- __forceinline bool isFastAccel() const { return !isCompactAccel() && !isRobustAccel(); }
- __forceinline bool isCompactAccel() const { return scene_flags & RTC_SCENE_FLAG_COMPACT; }
- __forceinline bool isRobustAccel() const { return scene_flags & RTC_SCENE_FLAG_ROBUST; }
- __forceinline bool isStaticAccel() const { return !(scene_flags & RTC_SCENE_FLAG_DYNAMIC); }
- __forceinline bool isDynamicAccel() const { return scene_flags & RTC_SCENE_FLAG_DYNAMIC; }
-
- __forceinline bool hasContextFilterFunction() const {
- return scene_flags & RTC_SCENE_FLAG_CONTEXT_FILTER_FUNCTION;
- }
-
- __forceinline bool hasGeometryFilterFunction() {
- return world.numFilterFunctions != 0;
- }
-
- __forceinline bool hasFilterFunction() {
- return hasContextFilterFunction() || hasGeometryFilterFunction();
- }
-
- /* test if scene got already build */
- __forceinline bool isBuild() const { return is_build; }
-
- public:
- IDPool<unsigned,0xFFFFFFFE> id_pool;
- vector<Ref<Geometry>> geometries; //!< list of all user geometries
- vector<unsigned int> geometryModCounters_;
- vector<float*> vertices;
-
- public:
- Device* device;
-
- /* these are to detect if we need to recreate the acceleration structures */
- bool flags_modified;
- unsigned int enabled_geometry_types;
-
- RTCSceneFlags scene_flags;
- RTCBuildQuality quality_flags;
- MutexSys buildMutex;
- SpinLock geometriesMutex;
- bool is_build;
- private:
- bool modified; //!< true if scene got modified
-
- public:
-
- /*! global lock step task scheduler */
-#if defined(TASKING_INTERNAL)
- MutexSys schedulerMutex;
- Ref<TaskScheduler> scheduler;
-#elif defined(TASKING_TBB) && TASKING_TBB_USE_TASK_ISOLATION
- tbb::isolated_task_group group;
-#elif defined(TASKING_TBB)
- tbb::task_group group;
-#elif defined(TASKING_PPL)
- concurrency::task_group group;
-#endif
-
- public:
- struct BuildProgressMonitorInterface : public BuildProgressMonitor {
- BuildProgressMonitorInterface(Scene* scene)
- : scene(scene) {}
- void operator() (size_t dn) const { scene->progressMonitor(double(dn)); }
- private:
- Scene* scene;
- };
- BuildProgressMonitorInterface progressInterface;
- RTCProgressMonitorFunction progress_monitor_function;
- void* progress_monitor_ptr;
- std::atomic<size_t> progress_monitor_counter;
- void progressMonitor(double nprims);
- void setProgressMonitorFunction(RTCProgressMonitorFunction func, void* ptr);
-
- private:
- GeometryCounts world; //!< counts for geometry
-
- public:
-
- __forceinline size_t numPrimitives() const {
- return world.size();
- }
-
- __forceinline size_t getNumPrimitives(Geometry::GTypeMask mask, bool mblur) const
- {
- size_t count = 0;
-
- if (mask & Geometry::MTY_TRIANGLE_MESH)
- count += mblur ? world.numMBTriangles : world.numTriangles;
-
- if (mask & Geometry::MTY_QUAD_MESH)
- count += mblur ? world.numMBQuads : world.numQuads;
-
- if (mask & Geometry::MTY_CURVE2)
- count += mblur ? world.numMBLineSegments : world.numLineSegments;
-
- if (mask & Geometry::MTY_CURVE4)
- count += mblur ? world.numMBBezierCurves : world.numBezierCurves;
-
- if (mask & Geometry::MTY_POINTS)
- count += mblur ? world.numMBPoints : world.numPoints;
-
- if (mask & Geometry::MTY_SUBDIV_MESH)
- count += mblur ? world.numMBSubdivPatches : world.numSubdivPatches;
-
- if (mask & Geometry::MTY_USER_GEOMETRY)
- count += mblur ? world.numMBUserGeometries : world.numUserGeometries;
-
- if (mask & Geometry::MTY_INSTANCE_CHEAP)
- count += mblur ? world.numMBInstancesCheap : world.numInstancesCheap;
-
- if (mask & Geometry::MTY_INSTANCE_EXPENSIVE)
- count += mblur ? world.numMBInstancesExpensive : world.numInstancesExpensive;
-
- if (mask & Geometry::MTY_GRID_MESH)
- count += mblur ? world.numMBGrids : world.numGrids;
-
- return count;
- }
-
- template<typename Mesh, bool mblur>
- __forceinline unsigned getNumTimeSteps()
- {
- if (!mblur)
- return 1;
-
- Scene::Iterator<Mesh,mblur> iter(this);
- return iter.maxTimeStepsPerGeometry();
- }
-
- template<typename Mesh, bool mblur>
- __forceinline unsigned int getMaxGeomID()
- {
- Scene::Iterator<Mesh,mblur> iter(this);
- return iter.maxGeomID();
- }
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_curves.h b/thirdparty/embree-aarch64/kernels/common/scene_curves.h
deleted file mode 100644
index 2649ab0e3e..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_curves.h
+++ /dev/null
@@ -1,341 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "geometry.h"
-#include "buffer.h"
-
-namespace embree
-{
- /*! represents an array of bicubic bezier curves */
- struct CurveGeometry : public Geometry
- {
- /*! type of this geometry */
- static const Geometry::GTypeMask geom_type = Geometry::MTY_CURVE4;
-
- public:
-
- /*! bezier curve construction */
- CurveGeometry (Device* device, Geometry::GType gtype);
-
- public:
- void setMask(unsigned mask);
- void setNumTimeSteps (unsigned int numTimeSteps);
- void setVertexAttributeCount (unsigned int N);
- void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
- void* getBuffer(RTCBufferType type, unsigned int slot);
- void updateBuffer(RTCBufferType type, unsigned int slot);
- void commit();
- bool verify();
- void setTessellationRate(float N);
- void setMaxRadiusScale(float s);
- void addElementsToCount (GeometryCounts & counts) const;
-
- public:
-
- /*! returns the number of vertices */
- __forceinline size_t numVertices() const {
- return vertices[0].size();
- }
-
- /*! returns the i'th curve */
- __forceinline const unsigned int& curve(size_t i) const {
- return curves[i];
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline Vec3ff vertex(size_t i) const {
- return vertices0[i];
- }
-
- /*! returns i'th normal of the first time step */
- __forceinline Vec3fa normal(size_t i) const {
- return normals0[i];
- }
-
- /*! returns i'th tangent of the first time step */
- __forceinline Vec3ff tangent(size_t i) const {
- return tangents0[i];
- }
-
- /*! returns i'th normal derivative of the first time step */
- __forceinline Vec3fa dnormal(size_t i) const {
- return dnormals0[i];
- }
-
- /*! returns i'th radius of the first time step */
- __forceinline float radius(size_t i) const {
- return vertices0[i].w;
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline Vec3ff vertex(size_t i, size_t itime) const {
- return vertices[itime][i];
- }
-
- /*! returns i'th normal of itime'th timestep */
- __forceinline Vec3fa normal(size_t i, size_t itime) const {
- return normals[itime][i];
- }
-
- /*! returns i'th tangent of itime'th timestep */
- __forceinline Vec3ff tangent(size_t i, size_t itime) const {
- return tangents[itime][i];
- }
-
- /*! returns i'th normal derivative of itime'th timestep */
- __forceinline Vec3fa dnormal(size_t i, size_t itime) const {
- return dnormals[itime][i];
- }
-
- /*! returns i'th radius of itime'th timestep */
- __forceinline float radius(size_t i, size_t itime) const {
- return vertices[itime][i].w;
- }
-
- /*! gathers the curve starting with i'th vertex */
- __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i) const
- {
- p0 = vertex(i+0);
- p1 = vertex(i+1);
- p2 = vertex(i+2);
- p3 = vertex(i+3);
- }
-
- /*! gathers the curve starting with i'th vertex of itime'th timestep */
- __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, size_t itime) const
- {
- p0 = vertex(i+0,itime);
- p1 = vertex(i+1,itime);
- p2 = vertex(i+2,itime);
- p3 = vertex(i+3,itime);
- }
-
- /*! gathers the curve starting with i'th vertex */
- __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i) const
- {
- p0 = vertex(i+0);
- p1 = vertex(i+1);
- p2 = vertex(i+2);
- p3 = vertex(i+3);
- n0 = normal(i+0);
- n1 = normal(i+1);
- n2 = normal(i+2);
- n3 = normal(i+3);
- }
-
- /*! gathers the curve starting with i'th vertex of itime'th timestep */
- __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, size_t itime) const
- {
- p0 = vertex(i+0,itime);
- p1 = vertex(i+1,itime);
- p2 = vertex(i+2,itime);
- p3 = vertex(i+3,itime);
- n0 = normal(i+0,itime);
- n1 = normal(i+1,itime);
- n2 = normal(i+2,itime);
- n3 = normal(i+3,itime);
- }
-
- /*! prefetches the curve starting with i'th vertex of itime'th timestep */
- __forceinline void prefetchL1_vertices(size_t i) const
- {
- prefetchL1(vertices0.getPtr(i)+0);
- prefetchL1(vertices0.getPtr(i)+64);
- }
-
- /*! prefetches the curve starting with i'th vertex of itime'th timestep */
- __forceinline void prefetchL2_vertices(size_t i) const
- {
- prefetchL2(vertices0.getPtr(i)+0);
- prefetchL2(vertices0.getPtr(i)+64);
- }
-
- /*! loads curve vertices for specified time */
- __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, float time) const
- {
- float ftime;
- const size_t itime = timeSegment(time, ftime);
-
- const float t0 = 1.0f - ftime;
- const float t1 = ftime;
- Vec3ff a0,a1,a2,a3;
- gather(a0,a1,a2,a3,i,itime);
- Vec3ff b0,b1,b2,b3;
- gather(b0,b1,b2,b3,i,itime+1);
- p0 = madd(Vec3ff(t0),a0,t1*b0);
- p1 = madd(Vec3ff(t0),a1,t1*b1);
- p2 = madd(Vec3ff(t0),a2,t1*b2);
- p3 = madd(Vec3ff(t0),a3,t1*b3);
- }
-
- /*! loads curve vertices for specified time */
- __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, float time) const
- {
- float ftime;
- const size_t itime = timeSegment(time, ftime);
-
- const float t0 = 1.0f - ftime;
- const float t1 = ftime;
- Vec3ff a0,a1,a2,a3; Vec3fa an0,an1,an2,an3;
- gather(a0,a1,a2,a3,an0,an1,an2,an3,i,itime);
- Vec3ff b0,b1,b2,b3; Vec3fa bn0,bn1,bn2,bn3;
- gather(b0,b1,b2,b3,bn0,bn1,bn2,bn3,i,itime+1);
- p0 = madd(Vec3ff(t0),a0,t1*b0);
- p1 = madd(Vec3ff(t0),a1,t1*b1);
- p2 = madd(Vec3ff(t0),a2,t1*b2);
- p3 = madd(Vec3ff(t0),a3,t1*b3);
- n0 = madd(Vec3ff(t0),an0,t1*bn0);
- n1 = madd(Vec3ff(t0),an1,t1*bn1);
- n2 = madd(Vec3ff(t0),an2,t1*bn2);
- n3 = madd(Vec3ff(t0),an3,t1*bn3);
- }
-
- template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
- __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const size_t itime) const
- {
- Vec3ff v0,v1,v2,v3; Vec3fa n0,n1,n2,n3;
- unsigned int vertexID = curve(primID);
- gather(v0,v1,v2,v3,n0,n1,n2,n3,vertexID,itime);
- SourceCurve3ff ccurve(v0,v1,v2,v3);
- SourceCurve3fa ncurve(n0,n1,n2,n3);
- ccurve = enlargeRadiusToMinWidth(context,this,ray_org,ccurve);
- return TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
- }
-
- template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
- __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const
- {
- float ftime;
- const size_t itime = timeSegment(time, ftime);
- const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+0);
- const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+1);
- return clerp(curve0,curve1,ftime);
- }
-
- /*! gathers the hermite curve starting with i'th vertex */
- __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i) const
- {
- p0 = vertex (i+0);
- p1 = vertex (i+1);
- t0 = tangent(i+0);
- t1 = tangent(i+1);
- }
-
- /*! gathers the hermite curve starting with i'th vertex of itime'th timestep */
- __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, size_t itime) const
- {
- p0 = vertex (i+0,itime);
- p1 = vertex (i+1,itime);
- t0 = tangent(i+0,itime);
- t1 = tangent(i+1,itime);
- }
-
- /*! loads curve vertices for specified time */
- __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, float time) const
- {
- float ftime;
- const size_t itime = timeSegment(time, ftime);
- const float f0 = 1.0f - ftime, f1 = ftime;
- Vec3ff ap0,at0,ap1,at1;
- gather_hermite(ap0,at0,ap1,at1,i,itime);
- Vec3ff bp0,bt0,bp1,bt1;
- gather_hermite(bp0,bt0,bp1,bt1,i,itime+1);
- p0 = madd(Vec3ff(f0),ap0,f1*bp0);
- t0 = madd(Vec3ff(f0),at0,f1*bt0);
- p1 = madd(Vec3ff(f0),ap1,f1*bp1);
- t1 = madd(Vec3ff(f0),at1,f1*bt1);
- }
-
- /*! gathers the hermite curve starting with i'th vertex */
- __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i) const
- {
- p0 = vertex (i+0);
- p1 = vertex (i+1);
- t0 = tangent(i+0);
- t1 = tangent(i+1);
- n0 = normal(i+0);
- n1 = normal(i+1);
- dn0 = dnormal(i+0);
- dn1 = dnormal(i+1);
- }
-
- /*! gathers the hermite curve starting with i'th vertex of itime'th timestep */
- __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, size_t itime) const
- {
- p0 = vertex (i+0,itime);
- p1 = vertex (i+1,itime);
- t0 = tangent(i+0,itime);
- t1 = tangent(i+1,itime);
- n0 = normal(i+0,itime);
- n1 = normal(i+1,itime);
- dn0 = dnormal(i+0,itime);
- dn1 = dnormal(i+1,itime);
- }
-
- /*! loads curve vertices for specified time */
- __forceinline void gather_hermite(Vec3ff& p0, Vec3fa& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3fa& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, float time) const
- {
- float ftime;
- const size_t itime = timeSegment(time, ftime);
- const float f0 = 1.0f - ftime, f1 = ftime;
- Vec3ff ap0,at0,ap1,at1; Vec3fa an0,adn0,an1,adn1;
- gather_hermite(ap0,at0,an0,adn0,ap1,at1,an1,adn1,i,itime);
- Vec3ff bp0,bt0,bp1,bt1; Vec3fa bn0,bdn0,bn1,bdn1;
- gather_hermite(bp0,bt0,bn0,bdn0,bp1,bt1,bn1,bdn1,i,itime+1);
- p0 = madd(Vec3ff(f0),ap0,f1*bp0);
- t0 = madd(Vec3ff(f0),at0,f1*bt0);
- n0 = madd(Vec3ff(f0),an0,f1*bn0);
- dn0= madd(Vec3ff(f0),adn0,f1*bdn0);
- p1 = madd(Vec3ff(f0),ap1,f1*bp1);
- t1 = madd(Vec3ff(f0),at1,f1*bt1);
- n1 = madd(Vec3ff(f0),an1,f1*bn1);
- dn1= madd(Vec3ff(f0),adn1,f1*bdn1);
- }
-
- template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
- __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const size_t itime) const
- {
- Vec3ff v0,t0,v1,t1; Vec3fa n0,dn0,n1,dn1;
- unsigned int vertexID = curve(primID);
- gather_hermite(v0,t0,n0,dn0,v1,t1,n1,dn1,vertexID,itime);
-
- SourceCurve3ff ccurve(v0,t0,v1,t1);
- SourceCurve3fa ncurve(n0,dn0,n1,dn1);
- ccurve = enlargeRadiusToMinWidth(context,this,ray_org,ccurve);
- return TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
- }
-
- template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
- __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const
- {
- float ftime;
- const size_t itime = timeSegment(time, ftime);
- const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+0);
- const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+1);
- return clerp(curve0,curve1,ftime);
- }
-
- private:
- void resizeBuffers(unsigned int numSteps);
-
- public:
- BufferView<unsigned int> curves; //!< array of curve indices
- BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer
- BufferView<Vec3fa> normals0; //!< fast access to first normal buffer
- BufferView<Vec3ff> tangents0; //!< fast access to first tangent buffer
- BufferView<Vec3fa> dnormals0; //!< fast access to first normal derivative buffer
- vector<BufferView<Vec3ff>> vertices; //!< vertex array for each timestep
- vector<BufferView<Vec3fa>> normals; //!< normal array for each timestep
- vector<BufferView<Vec3ff>> tangents; //!< tangent array for each timestep
- vector<BufferView<Vec3fa>> dnormals; //!< normal derivative array for each timestep
- BufferView<char> flags; //!< start, end flag per segment
- vector<BufferView<char>> vertexAttribs; //!< user buffers
- int tessellationRate; //!< tessellation rate for flat curve
- float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii
- };
-
- DECLARE_ISA_FUNCTION(CurveGeometry*, createCurves, Device* COMMA Geometry::GType);
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_grid_mesh.h b/thirdparty/embree-aarch64/kernels/common/scene_grid_mesh.h
deleted file mode 100644
index c08658466a..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_grid_mesh.h
+++ /dev/null
@@ -1,215 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "geometry.h"
-#include "buffer.h"
-
-namespace embree
-{
- /*! Grid Mesh */
- struct GridMesh : public Geometry
- {
- /*! type of this geometry */
- static const Geometry::GTypeMask geom_type = Geometry::MTY_GRID_MESH;
-
- /*! grid */
- struct Grid
- {
- unsigned int startVtxID;
- unsigned int lineVtxOffset;
- unsigned short resX,resY;
-
- /* border flags due to 3x3 vertex pattern */
- __forceinline unsigned int get3x3FlagsX(const unsigned int x) const
- {
- return (x + 2 >= (unsigned int)resX) ? (1<<15) : 0;
- }
-
- /* border flags due to 3x3 vertex pattern */
- __forceinline unsigned int get3x3FlagsY(const unsigned int y) const
- {
- return (y + 2 >= (unsigned int)resY) ? (1<<15) : 0;
- }
-
- /*! outputs grid structure */
- __forceinline friend embree_ostream operator<<(embree_ostream cout, const Grid& t) {
- return cout << "Grid { startVtxID " << t.startVtxID << ", lineVtxOffset " << t.lineVtxOffset << ", resX " << t.resX << ", resY " << t.resY << " }";
- }
- };
-
- public:
-
- /*! grid mesh construction */
- GridMesh (Device* device);
-
- /* geometry interface */
- public:
- void setMask(unsigned mask);
- void setNumTimeSteps (unsigned int numTimeSteps);
- void setVertexAttributeCount (unsigned int N);
- void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
- void* getBuffer(RTCBufferType type, unsigned int slot);
- void updateBuffer(RTCBufferType type, unsigned int slot);
- void commit();
- bool verify();
- void interpolate(const RTCInterpolateArguments* const args);
- void addElementsToCount (GeometryCounts & counts) const;
-
- __forceinline unsigned int getNumSubGrids(const size_t gridID)
- {
- const Grid &g = grid(gridID);
- return max((unsigned int)1,((unsigned int)g.resX >> 1) * ((unsigned int)g.resY >> 1));
- }
-
- /*! get fast access to first vertex buffer */
- __forceinline float * getCompactVertexArray () const {
- return (float*) vertices0.getPtr();
- }
-
- public:
-
- /*! returns number of vertices */
- __forceinline size_t numVertices() const {
- return vertices[0].size();
- }
-
- /*! returns i'th grid*/
- __forceinline const Grid& grid(size_t i) const {
- return grids[i];
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline const Vec3fa vertex(size_t i) const { // FIXME: check if this does a unaligned load
- return vertices0[i];
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline const char* vertexPtr(size_t i) const {
- return vertices0.getPtr(i);
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const Vec3fa vertex(size_t i, size_t itime) const {
- return vertices[itime][i];
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const char* vertexPtr(size_t i, size_t itime) const {
- return vertices[itime].getPtr(i);
- }
-
- /*! returns i'th vertex of the first timestep */
- __forceinline size_t grid_vertex_index(const Grid& g, size_t x, size_t y) const {
- assert(x < (size_t)g.resX);
- assert(y < (size_t)g.resY);
- return g.startVtxID + x + y * g.lineVtxOffset;
- }
-
- /*! returns i'th vertex of the first timestep */
- __forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y) const {
- const size_t index = grid_vertex_index(g,x,y);
- return vertex(index);
- }
-
- /*! returns i'th vertex of the itime'th timestep */
- __forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y, size_t itime) const {
- const size_t index = grid_vertex_index(g,x,y);
- return vertex(index,itime);
- }
-
- /*! calculates the build bounds of the i'th primitive, if it's valid */
- __forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, BBox3fa& bbox) const
- {
- BBox3fa b(empty);
- for (size_t t=0; t<numTimeSteps; t++)
- {
- for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++)
- for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++)
- {
- const Vec3fa v = grid_vertex(g,x,y,t);
- if (unlikely(!isvalid(v))) return false;
- b.extend(v);
- }
- }
-
- bbox = b;
- return true;
- }
-
- /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
- __forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, size_t itime, BBox3fa& bbox) const
- {
- assert(itime < numTimeSteps);
- BBox3fa b0(empty);
- for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++)
- for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++)
- {
- const Vec3fa v = grid_vertex(g,x,y,itime);
- if (unlikely(!isvalid(v))) return false;
- b0.extend(v);
- }
-
- /* use bounds of first time step in builder */
- bbox = b0;
- return true;
- }
-
- __forceinline bool valid(size_t gridID, size_t itime=0) const {
- return valid(gridID, make_range(itime, itime));
- }
-
- /*! check if the i'th primitive is valid between the specified time range */
- __forceinline bool valid(size_t gridID, const range<size_t>& itime_range) const
- {
- if (unlikely(gridID >= grids.size())) return false;
- const Grid &g = grid(gridID);
- if (unlikely(g.startVtxID + 0 >= vertices0.size())) return false;
- if (unlikely(g.startVtxID + (g.resY-1)*g.lineVtxOffset + g.resX-1 >= vertices0.size())) return false;
-
- for (size_t y=0;y<g.resY;y++)
- for (size_t x=0;x<g.resX;x++)
- for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
- if (!isvalid(grid_vertex(g,x,y,itime))) return false;
- return true;
- }
-
-
- __forceinline BBox3fa bounds(const Grid& g, size_t sx, size_t sy, size_t itime) const
- {
- BBox3fa box(empty);
- buildBounds(g,sx,sy,itime,box);
- return box;
- }
-
- __forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, size_t itime) const {
- BBox3fa bounds0, bounds1;
- buildBounds(g,sx,sy,itime+0,bounds0);
- buildBounds(g,sx,sy,itime+1,bounds1);
- return LBBox3fa(bounds0,bounds1);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, const BBox1f& dt) const {
- return LBBox3fa([&] (size_t itime) { return bounds(g,sx,sy,itime); }, dt, time_range, fnumTimeSegments);
- }
-
- public:
- BufferView<Grid> grids; //!< array of triangles
- BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer
- vector<BufferView<Vec3fa>> vertices; //!< vertex array for each timestep
- vector<RawBufferView> vertexAttribs; //!< vertex attributes
- };
-
- namespace isa
- {
- struct GridMeshISA : public GridMesh
- {
- GridMeshISA (Device* device)
- : GridMesh(device) {}
- };
- }
-
- DECLARE_ISA_FUNCTION(GridMesh*, createGridMesh, Device*);
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_instance.h b/thirdparty/embree-aarch64/kernels/common/scene_instance.h
deleted file mode 100644
index 7ff82a4fb8..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_instance.h
+++ /dev/null
@@ -1,272 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "geometry.h"
-#include "accel.h"
-
-namespace embree
-{
- struct MotionDerivativeCoefficients;
-
- /*! Instanced acceleration structure */
- struct Instance : public Geometry
- {
- ALIGNED_STRUCT_(16);
- static const Geometry::GTypeMask geom_type = Geometry::MTY_INSTANCE;
-
- public:
- Instance (Device* device, Accel* object = nullptr, unsigned int numTimeSteps = 1);
- ~Instance();
-
- private:
- Instance (const Instance& other) DELETED; // do not implement
- Instance& operator= (const Instance& other) DELETED; // do not implement
-
- private:
- LBBox3fa nonlinearBounds(const BBox1f& time_range_in,
- const BBox1f& geom_time_range,
- float geom_time_segments) const;
-
- BBox3fa boundSegment(size_t itime,
- BBox3fa const& obbox0, BBox3fa const& obbox1,
- BBox3fa const& bbox0, BBox3fa const& bbox1,
- float t_min, float t_max) const;
-
- /* calculates the (correct) interpolated bounds */
- __forceinline BBox3fa bounds(size_t itime0, size_t itime1, float f) const
- {
- if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
- return xfmBounds(slerp(local2world[itime0], local2world[itime1], f),
- lerp(getObjectBounds(itime0), getObjectBounds(itime1), f));
- return xfmBounds(lerp(local2world[itime0], local2world[itime1], f),
- lerp(getObjectBounds(itime0), getObjectBounds(itime1), f));
- }
-
- public:
- virtual void setNumTimeSteps (unsigned int numTimeSteps) override;
- virtual void setInstancedScene(const Ref<Scene>& scene) override;
- virtual void setTransform(const AffineSpace3fa& local2world, unsigned int timeStep) override;
- virtual void setQuaternionDecomposition(const AffineSpace3ff& qd, unsigned int timeStep) override;
- virtual AffineSpace3fa getTransform(float time) override;
- virtual void setMask (unsigned mask) override;
- virtual void build() {}
- virtual void addElementsToCount (GeometryCounts & counts) const override;
- virtual void commit() override;
-
- public:
-
- /*! calculates the bounds of instance */
- __forceinline BBox3fa bounds(size_t i) const {
- assert(i == 0);
- if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
- return xfmBounds(quaternionDecompositionToAffineSpace(local2world[0]),object->bounds.bounds());
- return xfmBounds(local2world[0],object->bounds.bounds());
- }
-
- /*! gets the bounds of the instanced scene */
- __forceinline BBox3fa getObjectBounds(size_t itime) const {
- return object->getBounds(timeStep(itime));
- }
-
- /*! calculates the bounds of instance */
- __forceinline BBox3fa bounds(size_t i, size_t itime) const {
- assert(i == 0);
- if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
- return xfmBounds(quaternionDecompositionToAffineSpace(local2world[itime]),getObjectBounds(itime));
- return xfmBounds(local2world[itime],getObjectBounds(itime));
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(size_t i, const BBox1f& dt) const {
- assert(i == 0);
- LBBox3fa lbbox = nonlinearBounds(dt, time_range, fnumTimeSegments);
- return lbbox;
- }
-
- /*! calculates the build bounds of the i'th item, if it's valid */
- __forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
- {
- assert(i==0);
- const BBox3fa b = bounds(i);
- if (bbox) *bbox = b;
- return isvalid(b);
- }
-
- /*! calculates the build bounds of the i'th item at the itime'th time segment, if it's valid */
- __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
- {
- assert(i==0);
- const LBBox3fa bounds = linearBounds(i,itime);
- bbox = bounds.bounds ();
- return isvalid(bounds);
- }
-
- /* gets version info of topology */
- unsigned int getTopologyVersion() const {
- return numPrimitives;
- }
-
- /* returns true if topology changed */
- bool topologyChanged(unsigned int otherVersion) const {
- return numPrimitives != otherVersion;
- }
-
- /*! check if the i'th primitive is valid between the specified time range */
- __forceinline bool valid(size_t i, const range<size_t>& itime_range) const
- {
- assert(i == 0);
- for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
- if (!isvalid(bounds(i,itime))) return false;
-
- return true;
- }
-
- __forceinline AffineSpace3fa getLocal2World() const
- {
- if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
- return quaternionDecompositionToAffineSpace(local2world[0]);
- return local2world[0];
- }
-
- __forceinline AffineSpace3fa getLocal2World(float t) const
- {
- float ftime; const unsigned int itime = timeSegment(t, ftime);
- if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
- return slerp(local2world[itime+0],local2world[itime+1],ftime);
- return lerp(local2world[itime+0],local2world[itime+1],ftime);
- }
-
- __forceinline AffineSpace3fa getWorld2Local() const {
- return world2local0;
- }
-
- __forceinline AffineSpace3fa getWorld2Local(float t) const {
- return rcp(getLocal2World(t));
- }
-
- template<int K>
- __forceinline AffineSpace3vf<K> getWorld2Local(const vbool<K>& valid, const vfloat<K>& t) const
- {
- if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
- return getWorld2LocalSlerp(valid, t);
- return getWorld2LocalLerp(valid, t);
- }
-
- private:
-
- template<int K>
- __forceinline AffineSpace3vf<K> getWorld2LocalSlerp(const vbool<K>& valid, const vfloat<K>& t) const
- {
- vfloat<K> ftime;
- const vint<K> itime_k = timeSegment(t, ftime);
- assert(any(valid));
- const size_t index = bsf(movemask(valid));
- const int itime = itime_k[index];
- if (likely(all(valid, itime_k == vint<K>(itime)))) {
- return rcp(slerp(AffineSpace3vff<K>(local2world[itime+0]),
- AffineSpace3vff<K>(local2world[itime+1]),
- ftime));
- }
- else {
- AffineSpace3vff<K> space0,space1;
- vbool<K> valid1 = valid;
- while (any(valid1)) {
- vbool<K> valid2;
- const int itime = next_unique(valid1, itime_k, valid2);
- space0 = select(valid2, AffineSpace3vff<K>(local2world[itime+0]), space0);
- space1 = select(valid2, AffineSpace3vff<K>(local2world[itime+1]), space1);
- }
- return rcp(slerp(space0, space1, ftime));
- }
- }
-
- template<int K>
- __forceinline AffineSpace3vf<K> getWorld2LocalLerp(const vbool<K>& valid, const vfloat<K>& t) const
- {
- vfloat<K> ftime;
- const vint<K> itime_k = timeSegment(t, ftime);
- assert(any(valid));
- const size_t index = bsf(movemask(valid));
- const int itime = itime_k[index];
- if (likely(all(valid, itime_k == vint<K>(itime)))) {
- return rcp(lerp(AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+0]),
- AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+1]),
- ftime));
- } else {
- AffineSpace3vf<K> space0,space1;
- vbool<K> valid1 = valid;
- while (any(valid1)) {
- vbool<K> valid2;
- const int itime = next_unique(valid1, itime_k, valid2);
- space0 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+0]), space0);
- space1 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+1]), space1);
- }
- return rcp(lerp(space0, space1, ftime));
- }
- }
-
- public:
- Accel* object; //!< pointer to instanced acceleration structure
- AffineSpace3ff* local2world; //!< transformation from local space to world space for each timestep (either normal matrix or quaternion decomposition)
- AffineSpace3fa world2local0; //!< transformation from world space to local space for timestep 0
- };
-
- namespace isa
- {
- struct InstanceISA : public Instance
- {
- InstanceISA (Device* device)
- : Instance(device) {}
-
- PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- assert(r.begin() == 0);
- assert(r.end() == 1);
-
- PrimInfo pinfo(empty);
- BBox3fa b = empty;
- if (!buildBounds(0,&b)) return pinfo;
- // const BBox3fa b = bounds(0);
- // if (!isvalid(b)) return pinfo;
-
- const PrimRef prim(b,geomID,unsigned(0));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- return pinfo;
- }
-
- PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- assert(r.begin() == 0);
- assert(r.end() == 1);
-
- PrimInfo pinfo(empty);
- BBox3fa b = empty;
- if (!buildBounds(0,&b)) return pinfo;
- // if (!valid(0,range<size_t>(itime))) return pinfo;
- // const PrimRef prim(linearBounds(0,itime).bounds(),geomID,unsigned(0));
- const PrimRef prim(b,geomID,unsigned(0));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- return pinfo;
- }
-
- PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- assert(r.begin() == 0);
- assert(r.end() == 1);
-
- PrimInfoMB pinfo(empty);
- if (!valid(0, timeSegmentRange(t0t1))) return pinfo;
- const PrimRefMB prim(linearBounds(0,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(0));
- pinfo.add_primref(prim);
- prims[k++] = prim;
- return pinfo;
- }
- };
- }
-
- DECLARE_ISA_FUNCTION(Instance*, createInstance, Device*);
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_line_segments.h b/thirdparty/embree-aarch64/kernels/common/scene_line_segments.h
deleted file mode 100644
index c0f9ee8f77..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_line_segments.h
+++ /dev/null
@@ -1,307 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-#include "geometry.h"
-#include "buffer.h"
-
-namespace embree
-{
- /*! represents an array of line segments */
- struct LineSegments : public Geometry
- {
- /*! type of this geometry */
- static const Geometry::GTypeMask geom_type = Geometry::MTY_CURVE2;
-
- public:
-
- /*! line segments construction */
- LineSegments (Device* device, Geometry::GType gtype);
-
- public:
- void setMask (unsigned mask);
- void setNumTimeSteps (unsigned int numTimeSteps);
- void setVertexAttributeCount (unsigned int N);
- void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
- void* getBuffer(RTCBufferType type, unsigned int slot);
- void updateBuffer(RTCBufferType type, unsigned int slot);
- void commit();
- bool verify ();
- void interpolate(const RTCInterpolateArguments* const args);
- void setTessellationRate(float N);
- void setMaxRadiusScale(float s);
- void addElementsToCount (GeometryCounts & counts) const;
-
- public:
-
- /*! returns the number of vertices */
- __forceinline size_t numVertices() const {
- return vertices[0].size();
- }
-
- /*! returns the i'th segment */
- __forceinline const unsigned int& segment(size_t i) const {
- return segments[i];
- }
-
- /*! returns the segment to the left of the i'th segment */
- __forceinline bool segmentLeftExists(size_t i) const {
- assert (flags);
- return (flags[i] & RTC_CURVE_FLAG_NEIGHBOR_LEFT) != 0;
- }
-
- /*! returns the segment to the right of the i'th segment */
- __forceinline bool segmentRightExists(size_t i) const {
- assert (flags);
- return (flags[i] & RTC_CURVE_FLAG_NEIGHBOR_RIGHT) != 0;
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline Vec3ff vertex(size_t i) const {
- return vertices0[i];
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline const char* vertexPtr(size_t i) const {
- return vertices0.getPtr(i);
- }
-
- /*! returns i'th normal of the first time step */
- __forceinline Vec3fa normal(size_t i) const {
- return normals0[i];
- }
-
- /*! returns i'th radius of the first time step */
- __forceinline float radius(size_t i) const {
- return vertices0[i].w;
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline Vec3ff vertex(size_t i, size_t itime) const {
- return vertices[itime][i];
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const char* vertexPtr(size_t i, size_t itime) const {
- return vertices[itime].getPtr(i);
- }
-
- /*! returns i'th normal of itime'th timestep */
- __forceinline Vec3fa normal(size_t i, size_t itime) const {
- return normals[itime][i];
- }
-
- /*! returns i'th radius of itime'th timestep */
- __forceinline float radius(size_t i, size_t itime) const {
- return vertices[itime][i].w;
- }
-
- /*! calculates bounding box of i'th line segment */
- __forceinline BBox3fa bounds(const Vec3ff& v0, const Vec3ff& v1) const
- {
- const BBox3ff b = merge(BBox3ff(v0),BBox3ff(v1));
- return enlarge((BBox3fa)b,maxRadiusScale*Vec3fa(max(v0.w,v1.w)));
- }
-
- /*! calculates bounding box of i'th line segment */
- __forceinline BBox3fa bounds(size_t i) const
- {
- const unsigned int index = segment(i);
- const Vec3ff v0 = vertex(index+0);
- const Vec3ff v1 = vertex(index+1);
- return bounds(v0,v1);
- }
-
- /*! calculates bounding box of i'th line segment for the itime'th time step */
- __forceinline BBox3fa bounds(size_t i, size_t itime) const
- {
- const unsigned int index = segment(i);
- const Vec3ff v0 = vertex(index+0,itime);
- const Vec3ff v1 = vertex(index+1,itime);
- return bounds(v0,v1);
- }
-
- /*! calculates bounding box of i'th line segment */
- __forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i) const
- {
- const unsigned int index = segment(i);
- const Vec3ff v0 = vertex(index+0);
- const Vec3ff v1 = vertex(index+1);
- const Vec3ff w0(xfmVector(space,(Vec3fa)v0),v0.w);
- const Vec3ff w1(xfmVector(space,(Vec3fa)v1),v1.w);
- return bounds(w0,w1);
- }
-
- /*! calculates bounding box of i'th line segment for the itime'th time step */
- __forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i, size_t itime) const
- {
- const unsigned int index = segment(i);
- const Vec3ff v0 = vertex(index+0,itime);
- const Vec3ff v1 = vertex(index+1,itime);
- const Vec3ff w0(xfmVector(space,(Vec3fa)v0),v0.w);
- const Vec3ff w1(xfmVector(space,(Vec3fa)v1),v1.w);
- return bounds(w0,w1);
- }
-
- /*! check if the i'th primitive is valid at the itime'th timestep */
- __forceinline bool valid(size_t i, size_t itime) const {
- return valid(i, make_range(itime, itime));
- }
-
- /*! check if the i'th primitive is valid between the specified time range */
- __forceinline bool valid(size_t i, const range<size_t>& itime_range) const
- {
- const unsigned int index = segment(i);
- if (index+1 >= numVertices()) return false;
-
- for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
- {
- const Vec3ff v0 = vertex(index+0,itime); if (unlikely(!isvalid4(v0))) return false;
- const Vec3ff v1 = vertex(index+1,itime); if (unlikely(!isvalid4(v1))) return false;
- if (min(v0.w,v1.w) < 0.0f) return false;
- }
- return true;
- }
-
- /*! calculates the linear bounds of the i'th primitive at the itimeGlobal'th time segment */
- __forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
- return LBBox3fa(bounds(i,itime+0),bounds(i,itime+1));
- }
-
- /*! calculates the build bounds of the i'th primitive, if it's valid */
- __forceinline bool buildBounds(size_t i, BBox3fa* bbox) const
- {
- if (!valid(i,0)) return false;
- *bbox = bounds(i);
- return true;
- }
-
- /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
- __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
- {
- if (!valid(i,itime+0) || !valid(i,itime+1)) return false;
- bbox = bounds(i,itime); // use bounds of first time step in builder
- return true;
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
- return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& dt) const {
- return LBBox3fa([&] (size_t itime) { return bounds(space, primID, itime); }, dt, time_range, fnumTimeSegments);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const
- {
- if (!valid(i, timeSegmentRange(time_range))) return false;
- bbox = linearBounds(i, time_range);
- return true;
- }
-
- /*! get fast access to first vertex buffer */
- __forceinline float * getCompactVertexArray () const {
- return (float*) vertices0.getPtr();
- }
-
- public:
- BufferView<unsigned int> segments; //!< array of line segment indices
- BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer
- BufferView<Vec3fa> normals0; //!< fast access to first normal buffer
- BufferView<char> flags; //!< start, end flag per segment
- vector<BufferView<Vec3ff>> vertices; //!< vertex array for each timestep
- vector<BufferView<Vec3fa>> normals; //!< normal array for each timestep
- vector<BufferView<char>> vertexAttribs; //!< user buffers
- int tessellationRate; //!< tessellation rate for bezier curve
- float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii
- };
-
- namespace isa
- {
- struct LineSegmentsISA : public LineSegments
- {
- LineSegmentsISA (Device* device, Geometry::GType gtype)
- : LineSegments(device,gtype) {}
-
- Vec3fa computeDirection(unsigned int primID) const
- {
- const unsigned vtxID = segment(primID);
- const Vec3fa v0 = vertex(vtxID+0);
- const Vec3fa v1 = vertex(vtxID+1);
- return v1-v0;
- }
-
- Vec3fa computeDirection(unsigned int primID, size_t time) const
- {
- const unsigned vtxID = segment(primID);
- const Vec3fa v0 = vertex(vtxID+0,time);
- const Vec3fa v1 = vertex(vtxID+1,time);
- return v1-v0;
- }
-
- PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!buildBounds(j,&bounds)) continue;
- const PrimRef prim(bounds,geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!buildBounds(j,itime,bounds)) continue;
- const PrimRef prim(bounds,geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfoMB pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!valid(j, timeSegmentRange(t0t1))) continue;
- const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j));
- pinfo.add_primref(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- BBox3fa vbounds(size_t i) const {
- return bounds(i);
- }
-
- BBox3fa vbounds(const LinearSpace3fa& space, size_t i) const {
- return bounds(space,i);
- }
-
- LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
- return linearBounds(primID,time_range);
- }
-
- LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
- return linearBounds(space,primID,time_range);
- }
- };
- }
-
- DECLARE_ISA_FUNCTION(LineSegments*, createLineSegments, Device* COMMA Geometry::GType);
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_points.h b/thirdparty/embree-aarch64/kernels/common/scene_points.h
deleted file mode 100644
index 1d39ed07ba..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_points.h
+++ /dev/null
@@ -1,282 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "buffer.h"
-#include "default.h"
-#include "geometry.h"
-
-namespace embree
-{
- /*! represents an array of points */
- struct Points : public Geometry
- {
- /*! type of this geometry */
- static const Geometry::GTypeMask geom_type = Geometry::MTY_POINTS;
-
- public:
- /*! line segments construction */
- Points(Device* device, Geometry::GType gtype);
-
- public:
- void setMask(unsigned mask);
- void setNumTimeSteps(unsigned int numTimeSteps);
- void setVertexAttributeCount(unsigned int N);
- void setBuffer(RTCBufferType type,
- unsigned int slot,
- RTCFormat format,
- const Ref<Buffer>& buffer,
- size_t offset,
- size_t stride,
- unsigned int num);
- void* getBuffer(RTCBufferType type, unsigned int slot);
- void updateBuffer(RTCBufferType type, unsigned int slot);
- void commit();
- bool verify();
- void setMaxRadiusScale(float s);
- void addElementsToCount (GeometryCounts & counts) const;
-
- public:
- /*! returns the number of vertices */
- __forceinline size_t numVertices() const {
- return vertices[0].size();
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline Vec3ff vertex(size_t i) const {
- return vertices0[i];
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline const char* vertexPtr(size_t i) const {
- return vertices0.getPtr(i);
- }
-
- /*! returns i'th normal of the first time step */
- __forceinline Vec3fa normal(size_t i) const {
- return normals0[i];
- }
-
- /*! returns i'th radius of the first time step */
- __forceinline float radius(size_t i) const {
- return vertices0[i].w;
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline Vec3ff vertex(size_t i, size_t itime) const {
- return vertices[itime][i];
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const char* vertexPtr(size_t i, size_t itime) const {
- return vertices[itime].getPtr(i);
- }
-
- /*! returns i'th normal of itime'th timestep */
- __forceinline Vec3fa normal(size_t i, size_t itime) const {
- return normals[itime][i];
- }
-
- /*! returns i'th radius of itime'th timestep */
- __forceinline float radius(size_t i, size_t itime) const {
- return vertices[itime][i].w;
- }
-
- /*! calculates bounding box of i'th line segment */
- __forceinline BBox3fa bounds(const Vec3ff& v0) const {
- return enlarge(BBox3fa(v0), maxRadiusScale*Vec3fa(v0.w));
- }
-
- /*! calculates bounding box of i'th line segment */
- __forceinline BBox3fa bounds(size_t i) const
- {
- const Vec3ff v0 = vertex(i);
- return bounds(v0);
- }
-
- /*! calculates bounding box of i'th line segment for the itime'th time step */
- __forceinline BBox3fa bounds(size_t i, size_t itime) const
- {
- const Vec3ff v0 = vertex(i, itime);
- return bounds(v0);
- }
-
- /*! calculates bounding box of i'th line segment */
- __forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i) const
- {
- const Vec3ff v0 = vertex(i);
- const Vec3ff w0(xfmVector(space, (Vec3fa)v0), v0.w);
- return bounds(w0);
- }
-
- /*! calculates bounding box of i'th line segment for the itime'th time step */
- __forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i, size_t itime) const
- {
- const Vec3ff v0 = vertex(i, itime);
- const Vec3ff w0(xfmVector(space, (Vec3fa)v0), v0.w);
- return bounds(w0);
- }
-
- /*! check if the i'th primitive is valid at the itime'th timestep */
- __forceinline bool valid(size_t i, size_t itime) const {
- return valid(i, make_range(itime, itime));
- }
-
- /*! check if the i'th primitive is valid between the specified time range */
- __forceinline bool valid(size_t i, const range<size_t>& itime_range) const
- {
- const unsigned int index = (unsigned int)i;
- if (index >= numVertices())
- return false;
-
- for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++) {
- const Vec3ff v0 = vertex(index + 0, itime);
- if (unlikely(!isvalid4(v0)))
- return false;
- if (v0.w < 0.0f)
- return false;
- }
- return true;
- }
-
- /*! calculates the linear bounds of the i'th primitive at the itimeGlobal'th time segment */
- __forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
- return LBBox3fa(bounds(i, itime + 0), bounds(i, itime + 1));
- }
-
- /*! calculates the build bounds of the i'th primitive, if it's valid */
- __forceinline bool buildBounds(size_t i, BBox3fa* bbox) const
- {
- if (!valid(i, 0))
- return false;
- *bbox = bounds(i);
- return true;
- }
-
- /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
- __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
- {
- if (!valid(i, itime + 0) || !valid(i, itime + 1))
- return false;
- bbox = bounds(i, itime); // use bounds of first time step in builder
- return true;
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
- return LBBox3fa([&](size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& dt) const {
- return LBBox3fa([&](size_t itime) { return bounds(space, primID, itime); }, dt, time_range, fnumTimeSegments);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const
- {
- if (!valid(i, timeSegmentRange(time_range))) return false;
- bbox = linearBounds(i, time_range);
- return true;
- }
-
- /*! get fast access to first vertex buffer */
- __forceinline float * getCompactVertexArray () const {
- return (float*) vertices0.getPtr();
- }
-
- public:
- BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer
- BufferView<Vec3fa> normals0; //!< fast access to first normal buffer
- vector<BufferView<Vec3ff>> vertices; //!< vertex array for each timestep
- vector<BufferView<Vec3fa>> normals; //!< normal array for each timestep
- vector<BufferView<char>> vertexAttribs; //!< user buffers
- float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii
- };
-
- namespace isa
- {
- struct PointsISA : public Points
- {
- PointsISA(Device* device, Geometry::GType gtype) : Points(device, gtype) {}
-
- Vec3fa computeDirection(unsigned int primID) const
- {
- return Vec3fa(1, 0, 0);
- }
-
- Vec3fa computeDirection(unsigned int primID, size_t time) const
- {
- return Vec3fa(1, 0, 0);
- }
-
- PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j = r.begin(); j < r.end(); j++) {
- BBox3fa bounds = empty;
- if (!buildBounds(j, &bounds))
- continue;
- const PrimRef prim(bounds, geomID, unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j = r.begin(); j < r.end(); j++) {
- BBox3fa bounds = empty;
- if (!buildBounds(j, itime, bounds))
- continue;
- const PrimRef prim(bounds, geomID, unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims,
- const BBox1f& t0t1,
- const range<size_t>& r,
- size_t k,
- unsigned int geomID) const
- {
- PrimInfoMB pinfo(empty);
- for (size_t j = r.begin(); j < r.end(); j++) {
- if (!valid(j, timeSegmentRange(t0t1)))
- continue;
- const PrimRefMB prim(linearBounds(j, t0t1), this->numTimeSegments(), this->time_range, this->numTimeSegments(), geomID, unsigned(j));
- pinfo.add_primref(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- BBox3fa vbounds(size_t i) const
- {
- return bounds(i);
- }
-
- BBox3fa vbounds(const LinearSpace3fa& space, size_t i) const
- {
- return bounds(space, i);
- }
-
- LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const
- {
- return linearBounds(primID, time_range);
- }
-
- LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const
- {
- return linearBounds(space, primID, time_range);
- }
- };
- } // namespace isa
-
- DECLARE_ISA_FUNCTION(Points*, createPoints, Device* COMMA Geometry::GType);
-} // namespace embree
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_quad_mesh.h b/thirdparty/embree-aarch64/kernels/common/scene_quad_mesh.h
deleted file mode 100644
index d5bb054b14..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_quad_mesh.h
+++ /dev/null
@@ -1,277 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "geometry.h"
-#include "buffer.h"
-
-namespace embree
-{
- /*! Quad Mesh */
- struct QuadMesh : public Geometry
- {
- /*! type of this geometry */
- static const Geometry::GTypeMask geom_type = Geometry::MTY_QUAD_MESH;
-
- /*! triangle indices */
- struct Quad
- {
- uint32_t v[4];
-
- /*! outputs triangle indices */
- __forceinline friend embree_ostream operator<<(embree_ostream cout, const Quad& q) {
- return cout << "Quad {" << q.v[0] << ", " << q.v[1] << ", " << q.v[2] << ", " << q.v[3] << " }";
- }
- };
-
- public:
-
- /*! quad mesh construction */
- QuadMesh (Device* device);
-
- /* geometry interface */
- public:
- void setMask(unsigned mask);
- void setNumTimeSteps (unsigned int numTimeSteps);
- void setVertexAttributeCount (unsigned int N);
- void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
- void* getBuffer(RTCBufferType type, unsigned int slot);
- void updateBuffer(RTCBufferType type, unsigned int slot);
- void commit();
- bool verify();
- void interpolate(const RTCInterpolateArguments* const args);
- void addElementsToCount (GeometryCounts & counts) const;
-
- public:
-
- /*! returns number of vertices */
- __forceinline size_t numVertices() const {
- return vertices[0].size();
- }
-
- /*! returns i'th quad */
- __forceinline const Quad& quad(size_t i) const {
- return quads[i];
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const Vec3fa vertex(size_t i) const {
- return vertices0[i];
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const char* vertexPtr(size_t i) const {
- return vertices0.getPtr(i);
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const Vec3fa vertex(size_t i, size_t itime) const {
- return vertices[itime][i];
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const char* vertexPtr(size_t i, size_t itime) const {
- return vertices[itime].getPtr(i);
- }
-
- /*! calculates the bounds of the i'th quad */
- __forceinline BBox3fa bounds(size_t i) const
- {
- const Quad& q = quad(i);
- const Vec3fa v0 = vertex(q.v[0]);
- const Vec3fa v1 = vertex(q.v[1]);
- const Vec3fa v2 = vertex(q.v[2]);
- const Vec3fa v3 = vertex(q.v[3]);
- return BBox3fa(min(v0,v1,v2,v3),max(v0,v1,v2,v3));
- }
-
- /*! calculates the bounds of the i'th quad at the itime'th timestep */
- __forceinline BBox3fa bounds(size_t i, size_t itime) const
- {
- const Quad& q = quad(i);
- const Vec3fa v0 = vertex(q.v[0],itime);
- const Vec3fa v1 = vertex(q.v[1],itime);
- const Vec3fa v2 = vertex(q.v[2],itime);
- const Vec3fa v3 = vertex(q.v[3],itime);
- return BBox3fa(min(v0,v1,v2,v3),max(v0,v1,v2,v3));
- }
-
- /*! check if the i'th primitive is valid at the itime'th timestep */
- __forceinline bool valid(size_t i, size_t itime) const {
- return valid(i, make_range(itime, itime));
- }
-
- /*! check if the i'th primitive is valid between the specified time range */
- __forceinline bool valid(size_t i, const range<size_t>& itime_range) const
- {
- const Quad& q = quad(i);
- if (unlikely(q.v[0] >= numVertices())) return false;
- if (unlikely(q.v[1] >= numVertices())) return false;
- if (unlikely(q.v[2] >= numVertices())) return false;
- if (unlikely(q.v[3] >= numVertices())) return false;
-
- for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
- {
- if (!isvalid(vertex(q.v[0],itime))) return false;
- if (!isvalid(vertex(q.v[1],itime))) return false;
- if (!isvalid(vertex(q.v[2],itime))) return false;
- if (!isvalid(vertex(q.v[3],itime))) return false;
- }
-
- return true;
- }
-
- /*! calculates the linear bounds of the i'th quad at the itimeGlobal'th time segment */
- __forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
- return LBBox3fa(bounds(i,itime+0),bounds(i,itime+1));
- }
-
- /*! calculates the build bounds of the i'th primitive, if it's valid */
- __forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
- {
- const Quad& q = quad(i);
- if (q.v[0] >= numVertices()) return false;
- if (q.v[1] >= numVertices()) return false;
- if (q.v[2] >= numVertices()) return false;
- if (q.v[3] >= numVertices()) return false;
-
- for (unsigned int t=0; t<numTimeSteps; t++)
- {
- const Vec3fa v0 = vertex(q.v[0],t);
- const Vec3fa v1 = vertex(q.v[1],t);
- const Vec3fa v2 = vertex(q.v[2],t);
- const Vec3fa v3 = vertex(q.v[3],t);
-
- if (unlikely(!isvalid(v0) || !isvalid(v1) || !isvalid(v2) || !isvalid(v3)))
- return false;
- }
-
- if (bbox)
- *bbox = bounds(i);
-
- return true;
- }
-
- /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
- __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
- {
- const Quad& q = quad(i);
- if (unlikely(q.v[0] >= numVertices())) return false;
- if (unlikely(q.v[1] >= numVertices())) return false;
- if (unlikely(q.v[2] >= numVertices())) return false;
- if (unlikely(q.v[3] >= numVertices())) return false;
-
- assert(itime+1 < numTimeSteps);
- const Vec3fa a0 = vertex(q.v[0],itime+0); if (unlikely(!isvalid(a0))) return false;
- const Vec3fa a1 = vertex(q.v[1],itime+0); if (unlikely(!isvalid(a1))) return false;
- const Vec3fa a2 = vertex(q.v[2],itime+0); if (unlikely(!isvalid(a2))) return false;
- const Vec3fa a3 = vertex(q.v[3],itime+0); if (unlikely(!isvalid(a3))) return false;
- const Vec3fa b0 = vertex(q.v[0],itime+1); if (unlikely(!isvalid(b0))) return false;
- const Vec3fa b1 = vertex(q.v[1],itime+1); if (unlikely(!isvalid(b1))) return false;
- const Vec3fa b2 = vertex(q.v[2],itime+1); if (unlikely(!isvalid(b2))) return false;
- const Vec3fa b3 = vertex(q.v[3],itime+1); if (unlikely(!isvalid(b3))) return false;
-
- /* use bounds of first time step in builder */
- bbox = BBox3fa(min(a0,a1,a2,a3),max(a0,a1,a2,a3));
- return true;
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
- return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline bool linearBounds(size_t i, const BBox1f& dt, LBBox3fa& bbox) const
- {
- if (!valid(i, timeSegmentRange(dt))) return false;
- bbox = linearBounds(i, dt);
- return true;
- }
-
- /*! get fast access to first vertex buffer */
- __forceinline float * getCompactVertexArray () const {
- return (float*) vertices0.getPtr();
- }
-
- /* gets version info of topology */
- unsigned int getTopologyVersion() const {
- return quads.modCounter;
- }
-
- /* returns true if topology changed */
- bool topologyChanged(unsigned int otherVersion) const {
- return quads.isModified(otherVersion); // || numPrimitivesChanged;
- }
-
- /* returns the projected area */
- __forceinline float projectedPrimitiveArea(const size_t i) const {
- const Quad& q = quad(i);
- const Vec3fa v0 = vertex(q.v[0]);
- const Vec3fa v1 = vertex(q.v[1]);
- const Vec3fa v2 = vertex(q.v[2]);
- const Vec3fa v3 = vertex(q.v[3]);
- return areaProjectedTriangle(v0,v1,v3) +
- areaProjectedTriangle(v1,v2,v3);
- }
-
- public:
- BufferView<Quad> quads; //!< array of quads
- BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer
- vector<BufferView<Vec3fa>> vertices; //!< vertex array for each timestep
- vector<BufferView<char>> vertexAttribs; //!< vertex attribute buffers
- };
-
- namespace isa
- {
- struct QuadMeshISA : public QuadMesh
- {
- QuadMeshISA (Device* device)
- : QuadMesh(device) {}
-
- PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!buildBounds(j,&bounds)) continue;
- const PrimRef prim(bounds,geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!buildBounds(j,itime,bounds)) continue;
- const PrimRef prim(bounds,geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfoMB pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!valid(j, timeSegmentRange(t0t1))) continue;
- const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j));
- pinfo.add_primref(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
- };
- }
-
- DECLARE_ISA_FUNCTION(QuadMesh*, createQuadMesh, Device*);
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_subdiv_mesh.h b/thirdparty/embree-aarch64/kernels/common/scene_subdiv_mesh.h
deleted file mode 100644
index d0246009db..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_subdiv_mesh.h
+++ /dev/null
@@ -1,326 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "geometry.h"
-#include "buffer.h"
-#include "../subdiv/half_edge.h"
-#include "../subdiv/tessellation_cache.h"
-#include "../subdiv/catmullclark_coefficients.h"
-#include "../subdiv/patch.h"
-#include "../../common/algorithms/parallel_map.h"
-#include "../../common/algorithms/parallel_set.h"
-
-namespace embree
-{
- class SubdivMesh : public Geometry
- {
- ALIGNED_CLASS_(16);
- public:
-
- typedef HalfEdge::Edge Edge;
-
- /*! type of this geometry */
- static const Geometry::GTypeMask geom_type = Geometry::MTY_SUBDIV_MESH;
-
- /*! structure used to sort half edges using radix sort by their key */
- struct KeyHalfEdge
- {
- KeyHalfEdge() {}
-
- KeyHalfEdge (uint64_t key, HalfEdge* edge)
- : key(key), edge(edge) {}
-
- __forceinline operator uint64_t() const {
- return key;
- }
-
- friend __forceinline bool operator<(const KeyHalfEdge& e0, const KeyHalfEdge& e1) {
- return e0.key < e1.key;
- }
-
- public:
- uint64_t key;
- HalfEdge* edge;
- };
-
- public:
-
- /*! subdiv mesh construction */
- SubdivMesh(Device* device);
-
- public:
- void setMask (unsigned mask);
- void setSubdivisionMode (unsigned int topologyID, RTCSubdivisionMode mode);
- void setVertexAttributeTopology(unsigned int vertexAttribID, unsigned int topologyID);
- void setNumTimeSteps (unsigned int numTimeSteps);
- void setVertexAttributeCount (unsigned int N);
- void setTopologyCount (unsigned int N);
- void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
- void* getBuffer(RTCBufferType type, unsigned int slot);
- void updateBuffer(RTCBufferType type, unsigned int slot);
- void setTessellationRate(float N);
- bool verify();
- void commit();
- void addElementsToCount (GeometryCounts & counts) const;
- void setDisplacementFunction (RTCDisplacementFunctionN func);
- unsigned int getFirstHalfEdge(unsigned int faceID);
- unsigned int getFace(unsigned int edgeID);
- unsigned int getNextHalfEdge(unsigned int edgeID);
- unsigned int getPreviousHalfEdge(unsigned int edgeID);
- unsigned int getOppositeHalfEdge(unsigned int topologyID, unsigned int edgeID);
-
- public:
-
- /*! return the number of faces */
- size_t numFaces() const {
- return faceVertices.size();
- }
-
- /*! return the number of edges */
- size_t numEdges() const {
- return topology[0].vertexIndices.size();
- }
-
- /*! return the number of vertices */
- size_t numVertices() const {
- return vertices[0].size();
- }
-
- /*! calculates the bounds of the i'th subdivision patch at the j'th timestep */
- __forceinline BBox3fa bounds(size_t i, size_t j = 0) const {
- return topology[0].getHalfEdge(i)->bounds(vertices[j]);
- }
-
- /*! check if the i'th primitive is valid */
- __forceinline bool valid(size_t i) const {
- return topology[0].valid(i) && !invalidFace(i);
- }
-
- /*! check if the i'th primitive is valid for the j'th time range */
- __forceinline bool valid(size_t i, size_t j) const {
- return topology[0].valid(i) && !invalidFace(i,j);
- }
-
- /*! prints some statistics */
- void printStatistics();
-
- /*! initializes the half edge data structure */
- void initializeHalfEdgeStructures ();
-
- public:
-
- /*! returns the vertex buffer for some time step */
- __forceinline const BufferView<Vec3fa>& getVertexBuffer( const size_t t = 0 ) const {
- return vertices[t];
- }
-
- /* returns tessellation level of edge */
- __forceinline float getEdgeLevel(const size_t i) const
- {
- if (levels) return clamp(levels[i],1.0f,4096.0f); // FIXME: do we want to limit edge level?
- else return clamp(tessellationRate,1.0f,4096.0f); // FIXME: do we want to limit edge level?
- }
-
- public:
- RTCDisplacementFunctionN displFunc; //!< displacement function
-
- /*! all buffers in this section are provided by the application */
- public:
-
- /*! the topology contains all data that may differ when
- * interpolating different user data buffers */
- struct Topology
- {
- public:
-
- /*! Default topology construction */
- Topology () : halfEdges(nullptr,0) {}
-
- /*! Topology initialization */
- Topology (SubdivMesh* mesh);
-
- /*! make the class movable */
- public:
- Topology (Topology&& other) // FIXME: this is only required to workaround compilation issues under Windows
- : mesh(std::move(other.mesh)),
- vertexIndices(std::move(other.vertexIndices)),
- subdiv_mode(std::move(other.subdiv_mode)),
- halfEdges(std::move(other.halfEdges)),
- halfEdges0(std::move(other.halfEdges0)),
- halfEdges1(std::move(other.halfEdges1)) {}
-
- Topology& operator= (Topology&& other) // FIXME: this is only required to workaround compilation issues under Windows
- {
- mesh = std::move(other.mesh);
- vertexIndices = std::move(other.vertexIndices);
- subdiv_mode = std::move(other.subdiv_mode);
- halfEdges = std::move(other.halfEdges);
- halfEdges0 = std::move(other.halfEdges0);
- halfEdges1 = std::move(other.halfEdges1);
- return *this;
- }
-
- public:
- /*! check if the i'th primitive is valid in this topology */
- __forceinline bool valid(size_t i) const
- {
- if (unlikely(subdiv_mode == RTC_SUBDIVISION_MODE_NO_BOUNDARY)) {
- if (getHalfEdge(i)->faceHasBorder()) return false;
- }
- return true;
- }
-
- /*! updates the interpolation mode for the topology */
- void setSubdivisionMode (RTCSubdivisionMode mode);
-
- /*! marks all buffers as modified */
- void update ();
-
- /*! verifies index array */
- bool verify (size_t numVertices);
-
- /*! initializes the half edge data structure */
- void initializeHalfEdgeStructures ();
-
- private:
-
- /*! recalculates the half edges */
- void calculateHalfEdges();
-
- /*! updates half edges when recalculation is not necessary */
- void updateHalfEdges();
-
- /*! user input data */
- public:
-
- SubdivMesh* mesh;
-
- /*! indices of the vertices composing each face */
- BufferView<unsigned int> vertexIndices;
-
- /*! subdiv interpolation mode */
- RTCSubdivisionMode subdiv_mode;
-
- /*! generated data */
- public:
-
- /*! returns the start half edge for face f */
- __forceinline const HalfEdge* getHalfEdge ( const size_t f ) const {
- return &halfEdges[mesh->faceStartEdge[f]];
- }
-
- /*! Half edge structure, generated by initHalfEdgeStructures */
- mvector<HalfEdge> halfEdges;
-
- /*! the following data is only required during construction of the
- * half edge structure and can be cleared for static scenes */
- private:
-
- /*! two arrays used to sort the half edges */
- std::vector<KeyHalfEdge> halfEdges0;
- std::vector<KeyHalfEdge> halfEdges1;
- };
-
- /*! returns the start half edge for topology t and face f */
- __forceinline const HalfEdge* getHalfEdge ( const size_t t , const size_t f ) const {
- return topology[t].getHalfEdge(f);
- }
-
- /*! buffer containing the number of vertices for each face */
- BufferView<unsigned int> faceVertices;
-
- /*! array of topologies */
- vector<Topology> topology;
-
- /*! vertex buffer (one buffer for each time step) */
- vector<BufferView<Vec3fa>> vertices;
-
- /*! user data buffers */
- vector<RawBufferView> vertexAttribs;
-
- /*! edge crease buffer containing edges (pairs of vertices) that carry edge crease weights */
- BufferView<Edge> edge_creases;
-
- /*! edge crease weights for each edge of the edge_creases buffer */
- BufferView<float> edge_crease_weights;
-
- /*! vertex crease buffer containing all vertices that carry vertex crease weights */
- BufferView<unsigned int> vertex_creases;
-
- /*! vertex crease weights for each vertex of the vertex_creases buffer */
- BufferView<float> vertex_crease_weights;
-
- /*! subdivision level for each half edge of the vertexIndices buffer */
- BufferView<float> levels;
- float tessellationRate; // constant rate that is used when levels is not set
-
- /*! buffer that marks specific faces as holes */
- BufferView<unsigned> holes;
-
- /*! all data in this section is generated by initializeHalfEdgeStructures function */
- private:
-
- /*! number of half edges used by faces */
- size_t numHalfEdges;
-
- /*! fast lookup table to find the first half edge for some face */
- mvector<uint32_t> faceStartEdge;
-
- /*! fast lookup table to find the face for some half edge */
- mvector<uint32_t> halfEdgeFace;
-
- /*! set with all holes */
- parallel_set<uint32_t> holeSet;
-
- /*! fast lookup table to detect invalid faces */
- mvector<int8_t> invalid_face;
-
- /*! test if face i is invalid in timestep j */
- __forceinline int8_t& invalidFace(size_t i, size_t j = 0) { return invalid_face[i*numTimeSteps+j]; }
- __forceinline const int8_t& invalidFace(size_t i, size_t j = 0) const { return invalid_face[i*numTimeSteps+j]; }
-
- /*! interpolation cache */
- public:
- static __forceinline size_t numInterpolationSlots4(size_t stride) { return (stride+15)/16; }
- static __forceinline size_t numInterpolationSlots8(size_t stride) { return (stride+31)/32; }
- static __forceinline size_t interpolationSlot(size_t prim, size_t slot, size_t stride) {
- const size_t slots = numInterpolationSlots4(stride);
- assert(slot < slots);
- return slots*prim+slot;
- }
- std::vector<std::vector<SharedLazyTessellationCache::CacheEntry>> vertex_buffer_tags;
- std::vector<std::vector<SharedLazyTessellationCache::CacheEntry>> vertex_attrib_buffer_tags;
- std::vector<Patch3fa::Ref> patch_eval_trees;
-
- /*! the following data is only required during construction of the
- * half edge structure and can be cleared for static scenes */
- private:
-
- /*! map with all vertex creases */
- parallel_map<uint32_t,float> vertexCreaseMap;
-
- /*! map with all edge creases */
- parallel_map<uint64_t,float> edgeCreaseMap;
-
- protected:
-
- /*! counts number of geometry commits */
- size_t commitCounter;
- };
-
- namespace isa
- {
- struct SubdivMeshISA : public SubdivMesh
- {
- SubdivMeshISA (Device* device)
- : SubdivMesh(device) {}
-
- void interpolate(const RTCInterpolateArguments* const args);
- void interpolateN(const RTCInterpolateNArguments* const args);
- };
- }
-
- DECLARE_ISA_FUNCTION(SubdivMesh*, createSubdivMesh, Device*);
-};
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.cpp b/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.cpp
deleted file mode 100644
index d1c2750f14..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.cpp
+++ /dev/null
@@ -1,243 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "scene_triangle_mesh.h"
-#include "scene.h"
-
-namespace embree
-{
-#if defined(EMBREE_LOWEST_ISA)
-
- TriangleMesh::TriangleMesh (Device* device)
- : Geometry(device,GTY_TRIANGLE_MESH,0,1)
- {
- vertices.resize(numTimeSteps);
- }
-
- void TriangleMesh::setMask (unsigned mask)
- {
- this->mask = mask;
- Geometry::update();
- }
-
- void TriangleMesh::setNumTimeSteps (unsigned int numTimeSteps)
- {
- vertices.resize(numTimeSteps);
- Geometry::setNumTimeSteps(numTimeSteps);
- }
-
- void TriangleMesh::setVertexAttributeCount (unsigned int N)
- {
- vertexAttribs.resize(N);
- Geometry::update();
- }
-
- void TriangleMesh::setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num)
- {
- /* verify that all accesses are 4 bytes aligned */
- if (((size_t(buffer->getPtr()) + offset) & 0x3) || (stride & 0x3))
- throw_RTCError(RTC_ERROR_INVALID_OPERATION, "data must be 4 bytes aligned");
-
- if (type == RTC_BUFFER_TYPE_VERTEX)
- {
- if (format != RTC_FORMAT_FLOAT3)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid vertex buffer format");
-
- /* if buffer is larger than 16GB the premultiplied index optimization does not work */
- if (stride*num > 16ll*1024ll*1024ll*1024ll)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION, "vertex buffer can be at most 16GB large");
-
- if (slot >= vertices.size())
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid vertex buffer slot");
-
- vertices[slot].set(buffer, offset, stride, num, format);
- vertices[slot].checkPadding16();
- vertices0 = vertices[0];
- }
- else if (type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
- {
- if (format < RTC_FORMAT_FLOAT || format > RTC_FORMAT_FLOAT16)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid vertex attribute buffer format");
-
- if (slot >= vertexAttribs.size())
- throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid vertex attribute buffer slot");
-
- vertexAttribs[slot].set(buffer, offset, stride, num, format);
- vertexAttribs[slot].checkPadding16();
- }
- else if (type == RTC_BUFFER_TYPE_INDEX)
- {
- if (slot != 0)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
- if (format != RTC_FORMAT_UINT3)
- throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid index buffer format");
-
- triangles.set(buffer, offset, stride, num, format);
- setNumPrimitives(num);
- }
- else
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown buffer type");
- }
-
- void* TriangleMesh::getBuffer(RTCBufferType type, unsigned int slot)
- {
- if (type == RTC_BUFFER_TYPE_INDEX)
- {
- if (slot != 0)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
- return triangles.getPtr();
- }
- else if (type == RTC_BUFFER_TYPE_VERTEX)
- {
- if (slot >= vertices.size())
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
- return vertices[slot].getPtr();
- }
- else if (type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
- {
- if (slot >= vertexAttribs.size())
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
- return vertexAttribs[slot].getPtr();
- }
- else
- {
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown buffer type");
- return nullptr;
- }
- }
-
- void TriangleMesh::updateBuffer(RTCBufferType type, unsigned int slot)
- {
- if (type == RTC_BUFFER_TYPE_INDEX)
- {
- if (slot != 0)
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
- triangles.setModified();
- }
- else if (type == RTC_BUFFER_TYPE_VERTEX)
- {
- if (slot >= vertices.size())
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
- vertices[slot].setModified();
- }
- else if (type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
- {
- if (slot >= vertexAttribs.size())
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
- vertexAttribs[slot].setModified();
- }
- else
- {
- throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown buffer type");
- }
-
- Geometry::update();
- }
-
- void TriangleMesh::commit()
- {
- /* verify that stride of all time steps are identical */
- for (unsigned int t=0; t<numTimeSteps; t++)
- if (vertices[t].getStride() != vertices[0].getStride())
- throw_RTCError(RTC_ERROR_INVALID_OPERATION,"stride of vertex buffers have to be identical for each time step");
-
- Geometry::commit();
- }
-
- void TriangleMesh::addElementsToCount (GeometryCounts & counts) const
- {
- if (numTimeSteps == 1) counts.numTriangles += numPrimitives;
- else counts.numMBTriangles += numPrimitives;
- }
-
- bool TriangleMesh::verify()
- {
- /*! verify size of vertex arrays */
- if (vertices.size() == 0) return false;
- for (const auto& buffer : vertices)
- if (buffer.size() != numVertices())
- return false;
-
- /*! verify size of user vertex arrays */
- for (const auto& buffer : vertexAttribs)
- if (buffer.size() != numVertices())
- return false;
-
- /*! verify triangle indices */
- for (size_t i=0; i<size(); i++) {
- if (triangles[i].v[0] >= numVertices()) return false;
- if (triangles[i].v[1] >= numVertices()) return false;
- if (triangles[i].v[2] >= numVertices()) return false;
- }
-
- /*! verify vertices */
- for (const auto& buffer : vertices)
- for (size_t i=0; i<buffer.size(); i++)
- if (!isvalid(buffer[i]))
- return false;
-
- return true;
- }
-
- void TriangleMesh::interpolate(const RTCInterpolateArguments* const args)
- {
- unsigned int primID = args->primID;
- float u = args->u;
- float v = args->v;
- RTCBufferType bufferType = args->bufferType;
- unsigned int bufferSlot = args->bufferSlot;
- float* P = args->P;
- float* dPdu = args->dPdu;
- float* dPdv = args->dPdv;
- float* ddPdudu = args->ddPdudu;
- float* ddPdvdv = args->ddPdvdv;
- float* ddPdudv = args->ddPdudv;
- unsigned int valueCount = args->valueCount;
-
- /* calculate base pointer and stride */
- assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) ||
- (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size()));
- const char* src = nullptr;
- size_t stride = 0;
- if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) {
- src = vertexAttribs[bufferSlot].getPtr();
- stride = vertexAttribs[bufferSlot].getStride();
- } else {
- src = vertices[bufferSlot].getPtr();
- stride = vertices[bufferSlot].getStride();
- }
-
- for (unsigned int i=0; i<valueCount; i+=4)
- {
- size_t ofs = i*sizeof(float);
- const float w = 1.0f-u-v;
- const Triangle& tri = triangle(primID);
- const vbool4 valid = vint4((int)i)+vint4(step) < vint4(int(valueCount));
- const vfloat4 p0 = vfloat4::loadu(valid,(float*)&src[tri.v[0]*stride+ofs]);
- const vfloat4 p1 = vfloat4::loadu(valid,(float*)&src[tri.v[1]*stride+ofs]);
- const vfloat4 p2 = vfloat4::loadu(valid,(float*)&src[tri.v[2]*stride+ofs]);
-
- if (P) {
- vfloat4::storeu(valid,P+i,madd(w,p0,madd(u,p1,v*p2)));
- }
- if (dPdu) {
- assert(dPdu); vfloat4::storeu(valid,dPdu+i,p1-p0);
- assert(dPdv); vfloat4::storeu(valid,dPdv+i,p2-p0);
- }
- if (ddPdudu) {
- assert(ddPdudu); vfloat4::storeu(valid,ddPdudu+i,vfloat4(zero));
- assert(ddPdvdv); vfloat4::storeu(valid,ddPdvdv+i,vfloat4(zero));
- assert(ddPdudv); vfloat4::storeu(valid,ddPdudv+i,vfloat4(zero));
- }
- }
- }
-
-#endif
-
- namespace isa
- {
- TriangleMesh* createTriangleMesh(Device* device) {
- return new TriangleMeshISA(device);
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.h b/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.h
deleted file mode 100644
index eaf2e1799a..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.h
+++ /dev/null
@@ -1,264 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "geometry.h"
-#include "buffer.h"
-
-namespace embree
-{
- /*! Triangle Mesh */
- struct TriangleMesh : public Geometry
- {
- /*! type of this geometry */
- static const Geometry::GTypeMask geom_type = Geometry::MTY_TRIANGLE_MESH;
-
- /*! triangle indices */
- struct Triangle
- {
- uint32_t v[3];
-
- /*! outputs triangle indices */
- __forceinline friend embree_ostream operator<<(embree_ostream cout, const Triangle& t) {
- return cout << "Triangle { " << t.v[0] << ", " << t.v[1] << ", " << t.v[2] << " }";
- }
- };
-
- public:
-
- /*! triangle mesh construction */
- TriangleMesh (Device* device);
-
- /* geometry interface */
- public:
- void setMask(unsigned mask);
- void setNumTimeSteps (unsigned int numTimeSteps);
- void setVertexAttributeCount (unsigned int N);
- void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
- void* getBuffer(RTCBufferType type, unsigned int slot);
- void updateBuffer(RTCBufferType type, unsigned int slot);
- void commit();
- bool verify();
- void interpolate(const RTCInterpolateArguments* const args);
- void addElementsToCount (GeometryCounts & counts) const;
-
- public:
-
- /*! returns number of vertices */
- __forceinline size_t numVertices() const {
- return vertices[0].size();
- }
-
- /*! returns i'th triangle*/
- __forceinline const Triangle& triangle(size_t i) const {
- return triangles[i];
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline const Vec3fa vertex(size_t i) const {
- return vertices0[i];
- }
-
- /*! returns i'th vertex of the first time step */
- __forceinline const char* vertexPtr(size_t i) const {
- return vertices0.getPtr(i);
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const Vec3fa vertex(size_t i, size_t itime) const {
- return vertices[itime][i];
- }
-
- /*! returns i'th vertex of itime'th timestep */
- __forceinline const char* vertexPtr(size_t i, size_t itime) const {
- return vertices[itime].getPtr(i);
- }
-
- /*! calculates the bounds of the i'th triangle */
- __forceinline BBox3fa bounds(size_t i) const
- {
- const Triangle& tri = triangle(i);
- const Vec3fa v0 = vertex(tri.v[0]);
- const Vec3fa v1 = vertex(tri.v[1]);
- const Vec3fa v2 = vertex(tri.v[2]);
- return BBox3fa(min(v0,v1,v2),max(v0,v1,v2));
- }
-
- /*! calculates the bounds of the i'th triangle at the itime'th timestep */
- __forceinline BBox3fa bounds(size_t i, size_t itime) const
- {
- const Triangle& tri = triangle(i);
- const Vec3fa v0 = vertex(tri.v[0],itime);
- const Vec3fa v1 = vertex(tri.v[1],itime);
- const Vec3fa v2 = vertex(tri.v[2],itime);
- return BBox3fa(min(v0,v1,v2),max(v0,v1,v2));
- }
-
- /*! check if the i'th primitive is valid at the itime'th timestep */
- __forceinline bool valid(size_t i, size_t itime) const {
- return valid(i, make_range(itime, itime));
- }
-
- /*! check if the i'th primitive is valid between the specified time range */
- __forceinline bool valid(size_t i, const range<size_t>& itime_range) const
- {
- const Triangle& tri = triangle(i);
- if (unlikely(tri.v[0] >= numVertices())) return false;
- if (unlikely(tri.v[1] >= numVertices())) return false;
- if (unlikely(tri.v[2] >= numVertices())) return false;
-
- for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
- {
- if (!isvalid(vertex(tri.v[0],itime))) return false;
- if (!isvalid(vertex(tri.v[1],itime))) return false;
- if (!isvalid(vertex(tri.v[2],itime))) return false;
- }
-
- return true;
- }
-
- /*! calculates the linear bounds of the i'th primitive at the itimeGlobal'th time segment */
- __forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
- return LBBox3fa(bounds(i,itime+0),bounds(i,itime+1));
- }
-
- /*! calculates the build bounds of the i'th primitive, if it's valid */
- __forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
- {
- const Triangle& tri = triangle(i);
- if (unlikely(tri.v[0] >= numVertices())) return false;
- if (unlikely(tri.v[1] >= numVertices())) return false;
- if (unlikely(tri.v[2] >= numVertices())) return false;
-
- for (size_t t=0; t<numTimeSteps; t++)
- {
- const Vec3fa v0 = vertex(tri.v[0],t);
- const Vec3fa v1 = vertex(tri.v[1],t);
- const Vec3fa v2 = vertex(tri.v[2],t);
- if (unlikely(!isvalid(v0) || !isvalid(v1) || !isvalid(v2)))
- return false;
- }
-
- if (likely(bbox))
- *bbox = bounds(i);
-
- return true;
- }
-
- /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
- __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
- {
- const Triangle& tri = triangle(i);
- if (unlikely(tri.v[0] >= numVertices())) return false;
- if (unlikely(tri.v[1] >= numVertices())) return false;
- if (unlikely(tri.v[2] >= numVertices())) return false;
-
- assert(itime+1 < numTimeSteps);
- const Vec3fa a0 = vertex(tri.v[0],itime+0); if (unlikely(!isvalid(a0))) return false;
- const Vec3fa a1 = vertex(tri.v[1],itime+0); if (unlikely(!isvalid(a1))) return false;
- const Vec3fa a2 = vertex(tri.v[2],itime+0); if (unlikely(!isvalid(a2))) return false;
- const Vec3fa b0 = vertex(tri.v[0],itime+1); if (unlikely(!isvalid(b0))) return false;
- const Vec3fa b1 = vertex(tri.v[1],itime+1); if (unlikely(!isvalid(b1))) return false;
- const Vec3fa b2 = vertex(tri.v[2],itime+1); if (unlikely(!isvalid(b2))) return false;
-
- /* use bounds of first time step in builder */
- bbox = BBox3fa(min(a0,a1,a2),max(a0,a1,a2));
- return true;
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
- return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
- }
-
- /*! calculates the linear bounds of the i'th primitive for the specified time range */
- __forceinline bool linearBounds(size_t i, const BBox1f& dt, LBBox3fa& bbox) const {
- if (!valid(i, timeSegmentRange(dt))) return false;
- bbox = linearBounds(i, dt);
- return true;
- }
-
- /*! get fast access to first vertex buffer */
- __forceinline float * getCompactVertexArray () const {
- return (float*) vertices0.getPtr();
- }
-
- /* gets version info of topology */
- unsigned int getTopologyVersion() const {
- return triangles.modCounter;
- }
-
- /* returns true if topology changed */
- bool topologyChanged(unsigned int otherVersion) const {
- return triangles.isModified(otherVersion); // || numPrimitivesChanged;
- }
-
- /* returns the projected area */
- __forceinline float projectedPrimitiveArea(const size_t i) const {
- const Triangle& tri = triangle(i);
- const Vec3fa v0 = vertex(tri.v[0]);
- const Vec3fa v1 = vertex(tri.v[1]);
- const Vec3fa v2 = vertex(tri.v[2]);
- return areaProjectedTriangle(v0,v1,v2);
- }
-
- public:
- BufferView<Triangle> triangles; //!< array of triangles
- BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer
- vector<BufferView<Vec3fa>> vertices; //!< vertex array for each timestep
- vector<RawBufferView> vertexAttribs; //!< vertex attributes
- };
-
- namespace isa
- {
- struct TriangleMeshISA : public TriangleMesh
- {
- TriangleMeshISA (Device* device)
- : TriangleMesh(device) {}
-
- PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!buildBounds(j,&bounds)) continue;
- const PrimRef prim(bounds,geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!buildBounds(j,itime,bounds)) continue;
- const PrimRef prim(bounds,geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfoMB pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!valid(j, timeSegmentRange(t0t1))) continue;
- const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j));
- pinfo.add_primref(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
- };
- }
-
- DECLARE_ISA_FUNCTION(TriangleMesh*, createTriangleMesh, Device*);
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/scene_user_geometry.h b/thirdparty/embree-aarch64/kernels/common/scene_user_geometry.h
deleted file mode 100644
index 8d11ed6986..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/scene_user_geometry.h
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "accelset.h"
-
-namespace embree
-{
- /*! User geometry with user defined intersection functions */
- struct UserGeometry : public AccelSet
- {
- /*! type of this geometry */
- static const Geometry::GTypeMask geom_type = Geometry::MTY_USER_GEOMETRY;
-
- public:
- UserGeometry (Device* device, unsigned int items = 0, unsigned int numTimeSteps = 1);
- virtual void setMask (unsigned mask);
- virtual void setBoundsFunction (RTCBoundsFunction bounds, void* userPtr);
- virtual void setIntersectFunctionN (RTCIntersectFunctionN intersect);
- virtual void setOccludedFunctionN (RTCOccludedFunctionN occluded);
- virtual void build() {}
- virtual void addElementsToCount (GeometryCounts & counts) const;
- };
-
- namespace isa
- {
- struct UserGeometryISA : public UserGeometry
- {
- UserGeometryISA (Device* device)
- : UserGeometry(device) {}
-
- PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!buildBounds(j,&bounds)) continue;
- const PrimRef prim(bounds,geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!buildBounds(j,itime,bounds)) continue;
- const PrimRef prim(bounds,geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
-
- PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
- {
- PrimInfoMB pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- if (!valid(j, timeSegmentRange(t0t1))) continue;
- const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j));
- pinfo.add_primref(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }
- };
- }
-
- DECLARE_ISA_FUNCTION(UserGeometry*, createUserGeometry, Device*);
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/stack_item.h b/thirdparty/embree-aarch64/kernels/common/stack_item.h
deleted file mode 100644
index 533c385365..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/stack_item.h
+++ /dev/null
@@ -1,125 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-
-namespace embree
-{
- /*! An item on the stack holds the node ID and distance of that node. */
- template<typename T>
- struct __aligned(16) StackItemT
- {
- /*! assert that the xchg function works */
- static_assert(sizeof(T) <= 12, "sizeof(T) <= 12 failed");
-
- __forceinline StackItemT() {}
-
- __forceinline StackItemT(T &ptr, unsigned &dist) : ptr(ptr), dist(dist) {}
-
- /*! use SSE instructions to swap stack items */
- __forceinline static void xchg(StackItemT& a, StackItemT& b)
- {
- const vfloat4 sse_a = vfloat4::load((float*)&a);
- const vfloat4 sse_b = vfloat4::load((float*)&b);
- vfloat4::store(&a,sse_b);
- vfloat4::store(&b,sse_a);
- }
-
- /*! Sort 2 stack items. */
- __forceinline friend void sort(StackItemT& s1, StackItemT& s2) {
- if (s2.dist < s1.dist) xchg(s2,s1);
- }
-
- /*! Sort 3 stack items. */
- __forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3)
- {
- if (s2.dist < s1.dist) xchg(s2,s1);
- if (s3.dist < s2.dist) xchg(s3,s2);
- if (s2.dist < s1.dist) xchg(s2,s1);
- }
-
- /*! Sort 4 stack items. */
- __forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3, StackItemT& s4)
- {
- if (s2.dist < s1.dist) xchg(s2,s1);
- if (s4.dist < s3.dist) xchg(s4,s3);
- if (s3.dist < s1.dist) xchg(s3,s1);
- if (s4.dist < s2.dist) xchg(s4,s2);
- if (s3.dist < s2.dist) xchg(s3,s2);
- }
-
- /*! use SSE instructions to swap stack items */
- __forceinline static void cmp_xchg(vint4& a, vint4& b)
- {
-#if defined(__AVX512VL__)
- const vboolf4 mask(shuffle<2,2,2,2>(b) < shuffle<2,2,2,2>(a));
-#else
- const vboolf4 mask0(b < a);
- const vboolf4 mask(shuffle<2,2,2,2>(mask0));
-#endif
- const vint4 c = select(mask,b,a);
- const vint4 d = select(mask,a,b);
- a = c;
- b = d;
- }
-
- /*! Sort 3 stack items. */
- __forceinline static void sort3(vint4& s1, vint4& s2, vint4& s3)
- {
- cmp_xchg(s2,s1);
- cmp_xchg(s3,s2);
- cmp_xchg(s2,s1);
- }
-
- /*! Sort 4 stack items. */
- __forceinline static void sort4(vint4& s1, vint4& s2, vint4& s3, vint4& s4)
- {
- cmp_xchg(s2,s1);
- cmp_xchg(s4,s3);
- cmp_xchg(s3,s1);
- cmp_xchg(s4,s2);
- cmp_xchg(s3,s2);
- }
-
-
- /*! Sort N stack items. */
- __forceinline friend void sort(StackItemT* begin, StackItemT* end)
- {
- for (StackItemT* i = begin+1; i != end; ++i)
- {
- const vfloat4 item = vfloat4::load((float*)i);
- const unsigned dist = i->dist;
- StackItemT* j = i;
-
- while ((j != begin) && ((j-1)->dist < dist))
- {
- vfloat4::store(j, vfloat4::load((float*)(j-1)));
- --j;
- }
-
- vfloat4::store(j, item);
- }
- }
-
- public:
- T ptr;
- unsigned dist;
- };
-
- /*! An item on the stack holds the node ID and active ray mask. */
- template<typename T>
- struct __aligned(8) StackItemMaskT
- {
- T ptr;
- size_t mask;
- };
-
- struct __aligned(8) StackItemMaskCoherent
- {
- size_t mask;
- size_t parent;
- size_t child;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/stat.cpp b/thirdparty/embree-aarch64/kernels/common/stat.cpp
deleted file mode 100644
index b73c3a8c76..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/stat.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "stat.h"
-
-namespace embree
-{
- Stat Stat::instance;
-
- Stat::Stat () {
- }
-
- Stat::~Stat ()
- {
-#ifdef EMBREE_STAT_COUNTERS
- Stat::print(std::cout);
-#endif
- }
-
- void Stat::print(std::ostream& cout)
- {
- Counters& cntrs = instance.cntrs;
- Counters::Data& data = instance.cntrs.code;
- //Counters::Data& data = instance.cntrs.active;
-
- /* print absolute numbers */
- cout << "--------- ABSOLUTE ---------" << std::endl;
- cout << " #normal_travs = " << float(data.normal.travs )*1E-6 << "M" << std::endl;
- cout << " #nodes = " << float(data.normal.trav_nodes )*1E-6 << "M" << std::endl;
- cout << " #nodes_xfm = " << float(data.normal.trav_xfm_nodes )*1E-6 << "M" << std::endl;
- cout << " #leaves = " << float(data.normal.trav_leaves )*1E-6 << "M" << std::endl;
- cout << " #prims = " << float(data.normal.trav_prims )*1E-6 << "M" << std::endl;
- cout << " #prim_hits = " << float(data.normal.trav_prim_hits )*1E-6 << "M" << std::endl;
-
- cout << " #stack nodes = " << float(data.normal.trav_stack_nodes )*1E-6 << "M" << std::endl;
- cout << " #stack pop = " << float(data.normal.trav_stack_pop )*1E-6 << "M" << std::endl;
-
- size_t normal_box_hits = 0;
- size_t weighted_box_hits = 0;
- for (size_t i=0;i<SIZE_HISTOGRAM;i++) {
- normal_box_hits += data.normal.trav_hit_boxes[i];
- weighted_box_hits += data.normal.trav_hit_boxes[i]*i;
- }
- cout << " #hit_boxes = " << normal_box_hits << " (total) distribution: ";
- float average = 0.0f;
- for (size_t i=0;i<SIZE_HISTOGRAM;i++)
- {
- float value = 100.0f * data.normal.trav_hit_boxes[i] / normal_box_hits;
- cout << "[" << i << "] " << value << " ";
- average += (float)i*data.normal.trav_hit_boxes[i] / normal_box_hits;
- }
- cout << " average = " << average << std::endl;
- for (size_t i=0;i<SIZE_HISTOGRAM;i++) cout << "[" << i << "] " << 100.0f * data.normal.trav_hit_boxes[i]*i / weighted_box_hits << " ";
- cout << std::endl;
-
- if (data.shadow.travs) {
- cout << " #shadow_travs = " << float(data.shadow.travs )*1E-6 << "M" << std::endl;
- cout << " #nodes = " << float(data.shadow.trav_nodes )*1E-6 << "M" << std::endl;
- cout << " #nodes_xfm = " << float(data.shadow.trav_xfm_nodes)*1E-6 << "M" << std::endl;
- cout << " #leaves = " << float(data.shadow.trav_leaves )*1E-6 << "M" << std::endl;
- cout << " #prims = " << float(data.shadow.trav_prims )*1E-6 << "M" << std::endl;
- cout << " #prim_hits = " << float(data.shadow.trav_prim_hits)*1E-6 << "M" << std::endl;
-
- cout << " #stack nodes = " << float(data.shadow.trav_stack_nodes )*1E-6 << "M" << std::endl;
- cout << " #stack pop = " << float(data.shadow.trav_stack_pop )*1E-6 << "M" << std::endl;
-
- size_t shadow_box_hits = 0;
- size_t weighted_shadow_box_hits = 0;
-
- for (size_t i=0;i<SIZE_HISTOGRAM;i++) {
- shadow_box_hits += data.shadow.trav_hit_boxes[i];
- weighted_shadow_box_hits += data.shadow.trav_hit_boxes[i]*i;
- }
- cout << " #hit_boxes = ";
- for (size_t i=0;i<SIZE_HISTOGRAM;i++) cout << "[" << i << "] " << 100.0f * data.shadow.trav_hit_boxes[i] / shadow_box_hits << " ";
- cout << std::endl;
- for (size_t i=0;i<SIZE_HISTOGRAM;i++) cout << "[" << i << "] " << 100.0f * data.shadow.trav_hit_boxes[i]*i / weighted_shadow_box_hits << " ";
- cout << std::endl;
- }
- cout << std::endl;
-
- /* print per traversal numbers */
- cout << "--------- PER TRAVERSAL ---------" << std::endl;
- float active_normal_travs = float(cntrs.active.normal.travs )/float(cntrs.all.normal.travs );
- float active_normal_trav_nodes = float(cntrs.active.normal.trav_nodes )/float(cntrs.all.normal.trav_nodes );
- float active_normal_trav_xfm_nodes = float(cntrs.active.normal.trav_xfm_nodes )/float(cntrs.all.normal.trav_xfm_nodes );
- float active_normal_trav_leaves = float(cntrs.active.normal.trav_leaves)/float(cntrs.all.normal.trav_leaves);
- float active_normal_trav_prims = float(cntrs.active.normal.trav_prims )/float(cntrs.all.normal.trav_prims );
- float active_normal_trav_prim_hits = float(cntrs.active.normal.trav_prim_hits )/float(cntrs.all.normal.trav_prim_hits );
- float active_normal_trav_stack_pop = float(cntrs.active.normal.trav_stack_pop )/float(cntrs.all.normal.trav_stack_pop );
-
- cout << " #normal_travs = " << float(cntrs.code.normal.travs )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_travs << "% active" << std::endl;
- cout << " #nodes = " << float(cntrs.code.normal.trav_nodes )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_nodes << "% active" << std::endl;
- cout << " #node_xfm = " << float(cntrs.code.normal.trav_xfm_nodes )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_xfm_nodes << "% active" << std::endl;
- cout << " #leaves = " << float(cntrs.code.normal.trav_leaves)/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_leaves << "% active" << std::endl;
- cout << " #prims = " << float(cntrs.code.normal.trav_prims )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_prims << "% active" << std::endl;
- cout << " #prim_hits = " << float(cntrs.code.normal.trav_prim_hits )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_prim_hits << "% active" << std::endl;
- cout << " #stack_pop = " << float(cntrs.code.normal.trav_stack_pop )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_stack_pop << "% active" << std::endl;
-
- if (cntrs.all.shadow.travs) {
- float active_shadow_travs = float(cntrs.active.shadow.travs )/float(cntrs.all.shadow.travs );
- float active_shadow_trav_nodes = float(cntrs.active.shadow.trav_nodes )/float(cntrs.all.shadow.trav_nodes );
- float active_shadow_trav_xfm_nodes = float(cntrs.active.shadow.trav_xfm_nodes )/float(cntrs.all.shadow.trav_xfm_nodes );
- float active_shadow_trav_leaves = float(cntrs.active.shadow.trav_leaves)/float(cntrs.all.shadow.trav_leaves);
- float active_shadow_trav_prims = float(cntrs.active.shadow.trav_prims )/float(cntrs.all.shadow.trav_prims );
- float active_shadow_trav_prim_hits = float(cntrs.active.shadow.trav_prim_hits )/float(cntrs.all.shadow.trav_prim_hits );
-
- cout << " #shadow_travs = " << float(cntrs.code.shadow.travs )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_travs << "% active" << std::endl;
- cout << " #nodes = " << float(cntrs.code.shadow.trav_nodes )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_nodes << "% active" << std::endl;
- cout << " #nodes_xfm = " << float(cntrs.code.shadow.trav_xfm_nodes )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_xfm_nodes << "% active" << std::endl;
- cout << " #leaves = " << float(cntrs.code.shadow.trav_leaves)/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_leaves << "% active" << std::endl;
- cout << " #prims = " << float(cntrs.code.shadow.trav_prims )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_prims << "% active" << std::endl;
- cout << " #prim_hits = " << float(cntrs.code.shadow.trav_prim_hits )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_prim_hits << "% active" << std::endl;
-
- }
- cout << std::endl;
-
- /* print user counters for performance tuning */
- cout << "--------- USER ---------" << std::endl;
- for (size_t i=0; i<10; i++)
- cout << "#user" << i << " = " << float(cntrs.user[i])/float(cntrs.all.normal.travs+cntrs.all.shadow.travs) << " per traversal" << std::endl;
-
- cout << "#user5/user3 " << 100.0f*float(cntrs.user[5])/float(cntrs.user[3]) << "%" << std::endl;
- cout << "#user6/user3 " << 100.0f*float(cntrs.user[6])/float(cntrs.user[3]) << "%" << std::endl;
- cout << "#user7/user3 " << 100.0f*float(cntrs.user[7])/float(cntrs.user[3]) << "%" << std::endl;
- cout << std::endl;
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/stat.h b/thirdparty/embree-aarch64/kernels/common/stat.h
deleted file mode 100644
index 3cda2bd014..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/stat.h
+++ /dev/null
@@ -1,116 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-
-/* Macros to gather statistics */
-#ifdef EMBREE_STAT_COUNTERS
-# define STAT(x) x
-# define STAT3(s,x,y,z) \
- STAT(Stat::get().code .s+=x); \
- STAT(Stat::get().active.s+=y); \
- STAT(Stat::get().all .s+=z);
-# define STAT_USER(i,x) Stat::get().user[i]+=x;
-#else
-# define STAT(x)
-# define STAT3(s,x,y,z)
-# define STAT_USER(i,x)
-#endif
-
-namespace embree
-{
- /*! Gathers ray tracing statistics. We count 1) how often a code
- * location is reached, 2) how many SIMD lanes are active, 3) how
- * many SIMD lanes reach the code location */
- class Stat
- {
- public:
-
- static const size_t SIZE_HISTOGRAM = 64+1;
-
- /*! constructs stat counter class */
- Stat ();
-
- /*! destructs stat counter class */
- ~Stat ();
-
- class Counters
- {
- public:
- Counters () {
- clear();
- }
-
- void clear()
- {
- all.clear();
- active.clear();
- code.clear();
- for (auto& u : user) u.store(0);
- }
-
- public:
-
- /* per packet and per ray stastics */
- struct Data
- {
- void clear () {
- normal.clear();
- shadow.clear();
- point_query.clear();
- }
-
- /* normal and shadow ray statistics */
- struct
- {
- void clear()
- {
- travs.store(0);
- trav_nodes.store(0);
- trav_leaves.store(0);
- trav_prims.store(0);
- trav_prim_hits.store(0);
- for (auto& v : trav_hit_boxes) v.store(0);
- trav_stack_pop.store(0);
- trav_stack_nodes.store(0);
- trav_xfm_nodes.store(0);
- }
-
- public:
- std::atomic<size_t> travs;
- std::atomic<size_t> trav_nodes;
- std::atomic<size_t> trav_leaves;
- std::atomic<size_t> trav_prims;
- std::atomic<size_t> trav_prim_hits;
- std::atomic<size_t> trav_hit_boxes[SIZE_HISTOGRAM+1];
- std::atomic<size_t> trav_stack_pop;
- std::atomic<size_t> trav_stack_nodes;
- std::atomic<size_t> trav_xfm_nodes;
-
- } normal, shadow, point_query;
- } all, active, code;
-
- std::atomic<size_t> user[10];
- };
-
- public:
-
- static __forceinline Counters& get() {
- return instance.cntrs;
- }
-
- static void clear() {
- instance.cntrs.clear();
- }
-
- static void print(embree_ostream cout);
-
- private:
- Counters cntrs;
-
- private:
- static Stat instance;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/state.cpp b/thirdparty/embree-aarch64/kernels/common/state.cpp
deleted file mode 100644
index 51fc9b7826..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/state.cpp
+++ /dev/null
@@ -1,543 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "state.h"
-#include "../../common/lexers/streamfilters.h"
-
-namespace embree
-{
- MutexSys g_printMutex;
-
- State::ErrorHandler State::g_errorHandler;
-
- State::ErrorHandler::ErrorHandler()
- : thread_error(createTls()) {}
-
- State::ErrorHandler::~ErrorHandler()
- {
- Lock<MutexSys> lock(errors_mutex);
- for (size_t i=0; i<thread_errors.size(); i++)
- delete thread_errors[i];
- destroyTls(thread_error);
- thread_errors.clear();
- }
-
- RTCError* State::ErrorHandler::error()
- {
- RTCError* stored_error = (RTCError*) getTls(thread_error);
- if (stored_error) return stored_error;
-
- Lock<MutexSys> lock(errors_mutex);
- stored_error = new RTCError(RTC_ERROR_NONE);
- thread_errors.push_back(stored_error);
- setTls(thread_error,stored_error);
- return stored_error;
- }
-
- State::State ()
- : enabled_cpu_features(getCPUFeatures()),
- enabled_builder_cpu_features(enabled_cpu_features),
- frequency_level(FREQUENCY_SIMD256)
- {
- tri_accel = "default";
- tri_builder = "default";
- tri_traverser = "default";
-
- tri_accel_mb = "default";
- tri_builder_mb = "default";
- tri_traverser_mb = "default";
-
- quad_accel = "default";
- quad_builder = "default";
- quad_traverser = "default";
-
- quad_accel_mb = "default";
- quad_builder_mb = "default";
- quad_traverser_mb = "default";
-
- line_accel = "default";
- line_builder = "default";
- line_traverser = "default";
-
- line_accel_mb = "default";
- line_builder_mb = "default";
- line_traverser_mb = "default";
-
- hair_accel = "default";
- hair_builder = "default";
- hair_traverser = "default";
-
- hair_accel_mb = "default";
- hair_builder_mb = "default";
- hair_traverser_mb = "default";
-
- object_accel = "default";
- object_builder = "default";
- object_accel_min_leaf_size = 1;
- object_accel_max_leaf_size = 1;
-
- object_accel_mb = "default";
- object_builder_mb = "default";
- object_accel_mb_min_leaf_size = 1;
- object_accel_mb_max_leaf_size = 1;
-
- max_spatial_split_replications = 1.2f;
- useSpatialPreSplits = false;
-
- tessellation_cache_size = 128*1024*1024;
-
- subdiv_accel = "default";
- subdiv_accel_mb = "default";
-
- grid_accel = "default";
- grid_builder = "default";
- grid_accel_mb = "default";
- grid_builder_mb = "default";
-
- instancing_open_min = 0;
- instancing_block_size = 0;
- instancing_open_factor = 8.0f;
- instancing_open_max_depth = 32;
- instancing_open_max = 50000000;
-
- ignore_config_files = false;
- float_exceptions = false;
- quality_flags = -1;
- scene_flags = -1;
- verbose = 0;
- benchmark = 0;
-
- numThreads = 0;
- numUserThreads = 0;
-
-#if TASKING_INTERNAL
- set_affinity = true;
-#else
- set_affinity = false;
-#endif
- /* per default enable affinity on KNL */
- if (hasISA(AVX512KNL)) set_affinity = true;
-
- start_threads = false;
- enable_selockmemoryprivilege = false;
-#if defined(__LINUX__)
- hugepages = true;
-#else
- hugepages = false;
-#endif
- hugepages_success = true;
-
- alloc_main_block_size = 0;
- alloc_num_main_slots = 0;
- alloc_thread_block_size = 0;
- alloc_single_thread_alloc = -1;
-
- error_function = nullptr;
- error_function_userptr = nullptr;
-
- memory_monitor_function = nullptr;
- memory_monitor_userptr = nullptr;
- }
-
- State::~State() {
- }
-
- bool State::hasISA(const int isa) {
- return (enabled_cpu_features & isa) == isa;
- }
-
- bool State::checkISASupport() {
-#if defined(__ARM_NEON)
- /*
- * NEON CPU type is a mixture of NEON and SSE2
- */
-
- bool hasSSE2 = (getCPUFeatures() & enabled_cpu_features) & CPU_FEATURE_SSE2;
-
- /* this will be true when explicitly initialize Device with `isa=neon` config */
- bool hasNEON = (getCPUFeatures() & enabled_cpu_features) & CPU_FEATURE_NEON;
-
- return hasSSE2 || hasNEON;
-#else
- return (getCPUFeatures() & enabled_cpu_features) == enabled_cpu_features;
-#endif
- }
-
- void State::verify()
- {
- /* verify that calculations stay in range */
- assert(rcp(min_rcp_input)*FLT_LARGE+FLT_LARGE < 0.01f*FLT_MAX);
-
- /* here we verify that CPP files compiled for a specific ISA only
- * call that same or lower ISA version of non-inlined class member
- * functions */
-#if defined(DEBUG)
-#if defined(EMBREE_TARGET_SSE2)
-#if !defined(__ARM_NEON)
- assert(sse2::getISA() <= SSE2);
-#endif
-#endif
-#if defined(EMBREE_TARGET_SSE42)
- assert(sse42::getISA() <= SSE42);
-#endif
-#if defined(EMBREE_TARGET_AVX)
- assert(avx::getISA() <= AVX);
-#endif
-#if defined(EMBREE_TARGET_AVX2)
- assert(avx2::getISA() <= AVX2);
-#endif
-#if defined (EMBREE_TARGET_AVX512KNL)
- assert(avx512knl::getISA() <= AVX512KNL);
-#endif
-#if defined (EMBREE_TARGET_AVX512SKX)
- assert(avx512skx::getISA() <= AVX512SKX);
-#endif
-#endif
- }
-
- const char* symbols[3] = { "=", ",", "|" };
-
- bool State::parseFile(const FileName& fileName)
- {
- FILE* f = fopen(fileName.c_str(),"r");
- if (!f) return false;
- Ref<Stream<int> > file = new FileStream(f,fileName);
-
- std::vector<std::string> syms;
- for (size_t i=0; i<sizeof(symbols)/sizeof(void*); i++)
- syms.push_back(symbols[i]);
-
- Ref<TokenStream> cin = new TokenStream(new LineCommentFilter(file,"#"),
- TokenStream::alpha+TokenStream::ALPHA+TokenStream::numbers+"_.",
- TokenStream::separators,syms);
- parse(cin);
- return true;
- }
-
- void State::parseString(const char* cfg)
- {
- if (cfg == nullptr) return;
-
- std::vector<std::string> syms;
- for (size_t i=0; i<sizeof(symbols)/sizeof(void*); i++)
- syms.push_back(symbols[i]);
-
- Ref<TokenStream> cin = new TokenStream(new StrStream(cfg),
- TokenStream::alpha+TokenStream::ALPHA+TokenStream::numbers+"_.",
- TokenStream::separators,syms);
- parse(cin);
- }
-
- int string_to_cpufeatures(const std::string& isa)
- {
- if (isa == "sse" ) return SSE;
- else if (isa == "sse2") return SSE2;
- else if (isa == "sse3") return SSE3;
- else if (isa == "ssse3") return SSSE3;
- else if (isa == "sse41") return SSE41;
- else if (isa == "sse4.1") return SSE41;
- else if (isa == "sse42") return SSE42;
- else if (isa == "sse4.2") return SSE42;
- else if (isa == "avx") return AVX;
- else if (isa == "avxi") return AVXI;
- else if (isa == "avx2") return AVX2;
- else if (isa == "avx512knl") return AVX512KNL;
- else if (isa == "avx512skx") return AVX512SKX;
- else return SSE2;
- }
-
- void State::parse(Ref<TokenStream> cin)
- {
- /* parse until end of stream */
- while (cin->peek() != Token::Eof())
- {
- const Token tok = cin->get();
-
- if (tok == Token::Id("threads") && cin->trySymbol("="))
- numThreads = cin->get().Int();
-
- else if (tok == Token::Id("user_threads")&& cin->trySymbol("="))
- numUserThreads = cin->get().Int();
-
- else if (tok == Token::Id("set_affinity")&& cin->trySymbol("="))
- set_affinity = cin->get().Int();
-
- else if (tok == Token::Id("affinity")&& cin->trySymbol("="))
- set_affinity = cin->get().Int();
-
- else if (tok == Token::Id("start_threads")&& cin->trySymbol("="))
- start_threads = cin->get().Int();
-
- else if (tok == Token::Id("isa") && cin->trySymbol("=")) {
- std::string isa = toLowerCase(cin->get().Identifier());
- enabled_cpu_features = string_to_cpufeatures(isa);
- enabled_builder_cpu_features = enabled_cpu_features;
- }
-
- else if (tok == Token::Id("max_isa") && cin->trySymbol("=")) {
- std::string isa = toLowerCase(cin->get().Identifier());
- enabled_cpu_features &= string_to_cpufeatures(isa);
- enabled_builder_cpu_features &= enabled_cpu_features;
- }
-
- else if (tok == Token::Id("max_builder_isa") && cin->trySymbol("=")) {
- std::string isa = toLowerCase(cin->get().Identifier());
- enabled_builder_cpu_features &= string_to_cpufeatures(isa);
- }
-
- else if (tok == Token::Id("frequency_level") && cin->trySymbol("=")) {
- std::string freq = cin->get().Identifier();
- if (freq == "simd128") frequency_level = FREQUENCY_SIMD128;
- else if (freq == "simd256") frequency_level = FREQUENCY_SIMD256;
- else if (freq == "simd512") frequency_level = FREQUENCY_SIMD512;
- }
-
- else if (tok == Token::Id("enable_selockmemoryprivilege") && cin->trySymbol("=")) {
- enable_selockmemoryprivilege = cin->get().Int();
- }
- else if (tok == Token::Id("hugepages") && cin->trySymbol("=")) {
- hugepages = cin->get().Int();
- }
-
- else if (tok == Token::Id("ignore_config_files") && cin->trySymbol("="))
- ignore_config_files = cin->get().Int();
- else if (tok == Token::Id("float_exceptions") && cin->trySymbol("="))
- float_exceptions = cin->get().Int();
-
- else if ((tok == Token::Id("tri_accel") || tok == Token::Id("accel")) && cin->trySymbol("="))
- tri_accel = cin->get().Identifier();
- else if ((tok == Token::Id("tri_builder") || tok == Token::Id("builder")) && cin->trySymbol("="))
- tri_builder = cin->get().Identifier();
- else if ((tok == Token::Id("tri_traverser") || tok == Token::Id("traverser")) && cin->trySymbol("="))
- tri_traverser = cin->get().Identifier();
-
- else if ((tok == Token::Id("tri_accel_mb") || tok == Token::Id("accel_mb")) && cin->trySymbol("="))
- tri_accel_mb = cin->get().Identifier();
- else if ((tok == Token::Id("tri_builder_mb") || tok == Token::Id("builder_mb")) && cin->trySymbol("="))
- tri_builder_mb = cin->get().Identifier();
- else if ((tok == Token::Id("tri_traverser_mb") || tok == Token::Id("traverser_mb")) && cin->trySymbol("="))
- tri_traverser_mb = cin->get().Identifier();
-
- else if ((tok == Token::Id("quad_accel")) && cin->trySymbol("="))
- quad_accel = cin->get().Identifier();
- else if ((tok == Token::Id("quad_builder")) && cin->trySymbol("="))
- quad_builder = cin->get().Identifier();
- else if ((tok == Token::Id("quad_traverser")) && cin->trySymbol("="))
- quad_traverser = cin->get().Identifier();
-
- else if ((tok == Token::Id("quad_accel_mb")) && cin->trySymbol("="))
- quad_accel_mb = cin->get().Identifier();
- else if ((tok == Token::Id("quad_builder_mb")) && cin->trySymbol("="))
- quad_builder_mb = cin->get().Identifier();
- else if ((tok == Token::Id("quad_traverser_mb")) && cin->trySymbol("="))
- quad_traverser_mb = cin->get().Identifier();
-
- else if ((tok == Token::Id("line_accel")) && cin->trySymbol("="))
- line_accel = cin->get().Identifier();
- else if ((tok == Token::Id("line_builder")) && cin->trySymbol("="))
- line_builder = cin->get().Identifier();
- else if ((tok == Token::Id("line_traverser")) && cin->trySymbol("="))
- line_traverser = cin->get().Identifier();
-
- else if ((tok == Token::Id("line_accel_mb")) && cin->trySymbol("="))
- line_accel_mb = cin->get().Identifier();
- else if ((tok == Token::Id("line_builder_mb")) && cin->trySymbol("="))
- line_builder_mb = cin->get().Identifier();
- else if ((tok == Token::Id("line_traverser_mb")) && cin->trySymbol("="))
- line_traverser_mb = cin->get().Identifier();
-
- else if (tok == Token::Id("hair_accel") && cin->trySymbol("="))
- hair_accel = cin->get().Identifier();
- else if (tok == Token::Id("hair_builder") && cin->trySymbol("="))
- hair_builder = cin->get().Identifier();
- else if (tok == Token::Id("hair_traverser") && cin->trySymbol("="))
- hair_traverser = cin->get().Identifier();
-
- else if (tok == Token::Id("hair_accel_mb") && cin->trySymbol("="))
- hair_accel_mb = cin->get().Identifier();
- else if (tok == Token::Id("hair_builder_mb") && cin->trySymbol("="))
- hair_builder_mb = cin->get().Identifier();
- else if (tok == Token::Id("hair_traverser_mb") && cin->trySymbol("="))
- hair_traverser_mb = cin->get().Identifier();
-
- else if (tok == Token::Id("object_accel") && cin->trySymbol("="))
- object_accel = cin->get().Identifier();
- else if (tok == Token::Id("object_builder") && cin->trySymbol("="))
- object_builder = cin->get().Identifier();
- else if (tok == Token::Id("object_accel_min_leaf_size") && cin->trySymbol("="))
- object_accel_min_leaf_size = cin->get().Int();
- else if (tok == Token::Id("object_accel_max_leaf_size") && cin->trySymbol("="))
- object_accel_max_leaf_size = cin->get().Int();
-
- else if (tok == Token::Id("object_accel_mb") && cin->trySymbol("="))
- object_accel_mb = cin->get().Identifier();
- else if (tok == Token::Id("object_builder_mb") && cin->trySymbol("="))
- object_builder_mb = cin->get().Identifier();
- else if (tok == Token::Id("object_accel_mb_min_leaf_size") && cin->trySymbol("="))
- object_accel_mb_min_leaf_size = cin->get().Int();
- else if (tok == Token::Id("object_accel_mb_max_leaf_size") && cin->trySymbol("="))
- object_accel_mb_max_leaf_size = cin->get().Int();
-
- else if (tok == Token::Id("instancing_open_min") && cin->trySymbol("="))
- instancing_open_min = cin->get().Int();
- else if (tok == Token::Id("instancing_block_size") && cin->trySymbol("=")) {
- instancing_block_size = cin->get().Int();
- instancing_open_factor = 0.0f;
- }
- else if (tok == Token::Id("instancing_open_max_depth") && cin->trySymbol("="))
- instancing_open_max_depth = cin->get().Int();
- else if (tok == Token::Id("instancing_open_factor") && cin->trySymbol("=")) {
- instancing_block_size = 0;
- instancing_open_factor = cin->get().Float();
- }
- else if (tok == Token::Id("instancing_open_max") && cin->trySymbol("="))
- instancing_open_max = cin->get().Int();
-
- else if (tok == Token::Id("subdiv_accel") && cin->trySymbol("="))
- subdiv_accel = cin->get().Identifier();
- else if (tok == Token::Id("subdiv_accel_mb") && cin->trySymbol("="))
- subdiv_accel_mb = cin->get().Identifier();
-
- else if (tok == Token::Id("grid_accel") && cin->trySymbol("="))
- grid_accel = cin->get().Identifier();
- else if (tok == Token::Id("grid_accel_mb") && cin->trySymbol("="))
- grid_accel_mb = cin->get().Identifier();
-
- else if (tok == Token::Id("verbose") && cin->trySymbol("="))
- verbose = cin->get().Int();
- else if (tok == Token::Id("benchmark") && cin->trySymbol("="))
- benchmark = cin->get().Int();
-
- else if (tok == Token::Id("quality")) {
- if (cin->trySymbol("=")) {
- Token flag = cin->get();
- if (flag == Token::Id("low")) quality_flags = RTC_BUILD_QUALITY_LOW;
- else if (flag == Token::Id("medium")) quality_flags = RTC_BUILD_QUALITY_MEDIUM;
- else if (flag == Token::Id("high")) quality_flags = RTC_BUILD_QUALITY_HIGH;
- }
- }
-
- else if (tok == Token::Id("scene_flags")) {
- scene_flags = 0;
- if (cin->trySymbol("=")) {
- do {
- Token flag = cin->get();
- if (flag == Token::Id("dynamic") ) scene_flags |= RTC_SCENE_FLAG_DYNAMIC;
- else if (flag == Token::Id("compact")) scene_flags |= RTC_SCENE_FLAG_COMPACT;
- else if (flag == Token::Id("robust")) scene_flags |= RTC_SCENE_FLAG_ROBUST;
- } while (cin->trySymbol("|"));
- }
- }
-
- else if (tok == Token::Id("max_spatial_split_replications") && cin->trySymbol("="))
- max_spatial_split_replications = cin->get().Float();
-
- else if (tok == Token::Id("presplits") && cin->trySymbol("="))
- useSpatialPreSplits = cin->get().Int() != 0 ? true : false;
-
- else if (tok == Token::Id("tessellation_cache_size") && cin->trySymbol("="))
- tessellation_cache_size = size_t(cin->get().Float()*1024.0f*1024.0f);
- else if (tok == Token::Id("cache_size") && cin->trySymbol("="))
- tessellation_cache_size = size_t(cin->get().Float()*1024.0f*1024.0f);
-
- else if (tok == Token::Id("alloc_main_block_size") && cin->trySymbol("="))
- alloc_main_block_size = cin->get().Int();
- else if (tok == Token::Id("alloc_num_main_slots") && cin->trySymbol("="))
- alloc_num_main_slots = cin->get().Int();
- else if (tok == Token::Id("alloc_thread_block_size") && cin->trySymbol("="))
- alloc_thread_block_size = cin->get().Int();
- else if (tok == Token::Id("alloc_single_thread_alloc") && cin->trySymbol("="))
- alloc_single_thread_alloc = cin->get().Int();
-
- cin->trySymbol(","); // optional , separator
- }
- }
-
- bool State::verbosity(size_t N) {
- return N <= verbose;
- }
-
- void State::print()
- {
- std::cout << "general:" << std::endl;
- std::cout << " build threads = " << numThreads << std::endl;
- std::cout << " build user threads = " << numUserThreads << std::endl;
- std::cout << " start_threads = " << start_threads << std::endl;
- std::cout << " affinity = " << set_affinity << std::endl;
- std::cout << " frequency_level = ";
- switch (frequency_level) {
- case FREQUENCY_SIMD128: std::cout << "simd128" << std::endl; break;
- case FREQUENCY_SIMD256: std::cout << "simd256" << std::endl; break;
- case FREQUENCY_SIMD512: std::cout << "simd512" << std::endl; break;
- default: std::cout << "error" << std::endl; break;
- }
-
- std::cout << " hugepages = ";
- if (!hugepages) std::cout << "disabled" << std::endl;
- else if (hugepages_success) std::cout << "enabled" << std::endl;
- else std::cout << "failed" << std::endl;
-
- std::cout << " verbosity = " << verbose << std::endl;
- std::cout << " cache_size = " << float(tessellation_cache_size)*1E-6 << " MB" << std::endl;
- std::cout << " max_spatial_split_replications = " << max_spatial_split_replications << std::endl;
-
- std::cout << "triangles:" << std::endl;
- std::cout << " accel = " << tri_accel << std::endl;
- std::cout << " builder = " << tri_builder << std::endl;
- std::cout << " traverser = " << tri_traverser << std::endl;
-
- std::cout << "motion blur triangles:" << std::endl;
- std::cout << " accel = " << tri_accel_mb << std::endl;
- std::cout << " builder = " << tri_builder_mb << std::endl;
- std::cout << " traverser = " << tri_traverser_mb << std::endl;
-
- std::cout << "quads:" << std::endl;
- std::cout << " accel = " << quad_accel << std::endl;
- std::cout << " builder = " << quad_builder << std::endl;
- std::cout << " traverser = " << quad_traverser << std::endl;
-
- std::cout << "motion blur quads:" << std::endl;
- std::cout << " accel = " << quad_accel_mb << std::endl;
- std::cout << " builder = " << quad_builder_mb << std::endl;
- std::cout << " traverser = " << quad_traverser_mb << std::endl;
-
- std::cout << "line segments:" << std::endl;
- std::cout << " accel = " << line_accel << std::endl;
- std::cout << " builder = " << line_builder << std::endl;
- std::cout << " traverser = " << line_traverser << std::endl;
-
- std::cout << "motion blur line segments:" << std::endl;
- std::cout << " accel = " << line_accel_mb << std::endl;
- std::cout << " builder = " << line_builder_mb << std::endl;
- std::cout << " traverser = " << line_traverser_mb << std::endl;
-
- std::cout << "hair:" << std::endl;
- std::cout << " accel = " << hair_accel << std::endl;
- std::cout << " builder = " << hair_builder << std::endl;
- std::cout << " traverser = " << hair_traverser << std::endl;
-
- std::cout << "motion blur hair:" << std::endl;
- std::cout << " accel = " << hair_accel_mb << std::endl;
- std::cout << " builder = " << hair_builder_mb << std::endl;
- std::cout << " traverser = " << hair_traverser_mb << std::endl;
-
- std::cout << "subdivision surfaces:" << std::endl;
- std::cout << " accel = " << subdiv_accel << std::endl;
-
- std::cout << "grids:" << std::endl;
- std::cout << " accel = " << grid_accel << std::endl;
- std::cout << " builder = " << grid_builder << std::endl;
-
- std::cout << "motion blur grids:" << std::endl;
- std::cout << " accel = " << grid_accel_mb << std::endl;
- std::cout << " builder = " << grid_builder_mb << std::endl;
-
- std::cout << "object_accel:" << std::endl;
- std::cout << " min_leaf_size = " << object_accel_min_leaf_size << std::endl;
- std::cout << " max_leaf_size = " << object_accel_max_leaf_size << std::endl;
-
- std::cout << "object_accel_mb:" << std::endl;
- std::cout << " min_leaf_size = " << object_accel_mb_min_leaf_size << std::endl;
- std::cout << " max_leaf_size = " << object_accel_mb_max_leaf_size << std::endl;
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/state.h b/thirdparty/embree-aarch64/kernels/common/state.h
deleted file mode 100644
index d0fccc023f..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/state.h
+++ /dev/null
@@ -1,197 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "default.h"
-
-namespace embree
-{
- /* mutex to make printing to cout thread safe */
- extern MutexSys g_printMutex;
-
- struct State : public RefCount
- {
- public:
- /*! state construction */
- State ();
-
- /*! state destruction */
- ~State();
-
- /*! verifies that state is correct */
- void verify();
-
- /*! parses state from a configuration file */
- bool parseFile(const FileName& fileName);
-
- /*! parses the state from a string */
- void parseString(const char* cfg);
-
- /*! parses the state from a stream */
- void parse(Ref<TokenStream> cin);
-
- /*! prints the state */
- void print();
-
- /*! checks if verbosity level is at least N */
- bool verbosity(size_t N);
-
- /*! checks if some particular ISA is enabled */
- bool hasISA(const int isa);
-
- /*! check whether selected ISA is supported by the HW */
- bool checkISASupport();
-
- public:
- std::string tri_accel; //!< acceleration structure to use for triangles
- std::string tri_builder; //!< builder to use for triangles
- std::string tri_traverser; //!< traverser to use for triangles
-
- public:
- std::string tri_accel_mb; //!< acceleration structure to use for motion blur triangles
- std::string tri_builder_mb; //!< builder to use for motion blur triangles
- std::string tri_traverser_mb; //!< traverser to use for triangles
-
- public:
- std::string quad_accel; //!< acceleration structure to use for quads
- std::string quad_builder; //!< builder to use for quads
- std::string quad_traverser; //!< traverser to use for quads
-
- public:
- std::string quad_accel_mb; //!< acceleration structure to use for motion blur quads
- std::string quad_builder_mb; //!< builder to use for motion blur quads
- std::string quad_traverser_mb; //!< traverser to use for motion blur quads
-
- public:
- std::string line_accel; //!< acceleration structure to use for line segments
- std::string line_builder; //!< builder to use for line segments
- std::string line_traverser; //!< traverser to use for line segments
-
- public:
- std::string line_accel_mb; //!< acceleration structure to use for motion blur line segments
- std::string line_builder_mb; //!< builder to use for motion blur line segments
- std::string line_traverser_mb; //!< traverser to use for motion blur line segments
-
- public:
- std::string hair_accel; //!< hair acceleration structure to use
- std::string hair_builder; //!< builder to use for hair
- std::string hair_traverser; //!< traverser to use for hair
-
- public:
- std::string hair_accel_mb; //!< acceleration structure to use for motion blur hair
- std::string hair_builder_mb; //!< builder to use for motion blur hair
- std::string hair_traverser_mb; //!< traverser to use for motion blur hair
-
- public:
- std::string object_accel; //!< acceleration structure for user geometries
- std::string object_builder; //!< builder for user geometries
- int object_accel_min_leaf_size; //!< minimum leaf size for object acceleration structure
- int object_accel_max_leaf_size; //!< maximum leaf size for object acceleration structure
-
- public:
- std::string object_accel_mb; //!< acceleration structure for user geometries
- std::string object_builder_mb; //!< builder for user geometries
- int object_accel_mb_min_leaf_size; //!< minimum leaf size for mblur object acceleration structure
- int object_accel_mb_max_leaf_size; //!< maximum leaf size for mblur object acceleration structure
-
- public:
- std::string subdiv_accel; //!< acceleration structure to use for subdivision surfaces
- std::string subdiv_accel_mb; //!< acceleration structure to use for subdivision surfaces
-
- public:
- std::string grid_accel; //!< acceleration structure to use for grids
- std::string grid_builder; //!< builder for grids
- std::string grid_accel_mb; //!< acceleration structure to use for motion blur grids
- std::string grid_builder_mb; //!< builder for motion blur grids
-
- public:
- float max_spatial_split_replications; //!< maximally replications*N many primitives in accel for spatial splits
- bool useSpatialPreSplits; //!< use spatial pre-splits instead of the full spatial split builder
- size_t tessellation_cache_size; //!< size of the shared tessellation cache
-
- public:
- size_t instancing_open_min; //!< instancing opens tree to minimally that number of subtrees
- size_t instancing_block_size; //!< instancing opens tree up to average block size of primitives
- float instancing_open_factor; //!< instancing opens tree up to x times the number of instances
- size_t instancing_open_max_depth; //!< maximum open depth for geometries
- size_t instancing_open_max; //!< instancing opens tree to maximally that number of subtrees
-
- public:
- bool ignore_config_files; //!< if true no more config files get parse
- bool float_exceptions; //!< enable floating point exceptions
- int quality_flags;
- int scene_flags;
- size_t verbose; //!< verbosity of output
- size_t benchmark; //!< true
-
- public:
- size_t numThreads; //!< number of threads to use in builders
- size_t numUserThreads; //!< number of user provided threads to use in builders
- bool set_affinity; //!< sets affinity for worker threads
- bool start_threads; //!< true when threads should be started at device creation time
- int enabled_cpu_features; //!< CPU ISA features to use
- int enabled_builder_cpu_features; //!< CPU ISA features to use for builders only
- enum FREQUENCY_LEVEL {
- FREQUENCY_SIMD128,
- FREQUENCY_SIMD256,
- FREQUENCY_SIMD512
- } frequency_level; //!< frequency level the app wants to run on (default is SIMD256)
- bool enable_selockmemoryprivilege; //!< configures the SeLockMemoryPrivilege under Windows to enable huge pages
- bool hugepages; //!< true if huge pages should get used
- bool hugepages_success; //!< status for enabling huge pages
-
- public:
- size_t alloc_main_block_size; //!< main allocation block size (shared between threads)
- int alloc_num_main_slots; //!< number of such shared blocks to be used to allocate
- size_t alloc_thread_block_size; //!< size of thread local allocator block size
- int alloc_single_thread_alloc; //!< in single mode nodes and leaves use same thread local allocator
-
- public:
-
- /*! checks if we can use AVX */
- bool canUseAVX() {
- return hasISA(AVX) && frequency_level != FREQUENCY_SIMD128;
- }
-
- /*! checks if we can use AVX2 */
- bool canUseAVX2() {
- return hasISA(AVX2) && frequency_level != FREQUENCY_SIMD128;
- }
-
- struct ErrorHandler
- {
- public:
- ErrorHandler();
- ~ErrorHandler();
- RTCError* error();
-
- public:
- tls_t thread_error;
- std::vector<RTCError*> thread_errors;
- MutexSys errors_mutex;
- };
- ErrorHandler errorHandler;
- static ErrorHandler g_errorHandler;
-
- public:
- void setErrorFunction(RTCErrorFunction fptr, void* uptr)
- {
- error_function = fptr;
- error_function_userptr = uptr;
- }
-
- RTCErrorFunction error_function;
- void* error_function_userptr;
-
- public:
- void setMemoryMonitorFunction(RTCMemoryMonitorFunction fptr, void* uptr)
- {
- memory_monitor_function = fptr;
- memory_monitor_userptr = uptr;
- }
-
- RTCMemoryMonitorFunction memory_monitor_function;
- void* memory_monitor_userptr;
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/common/vector.h b/thirdparty/embree-aarch64/kernels/common/vector.h
deleted file mode 100644
index b478762240..0000000000
--- a/thirdparty/embree-aarch64/kernels/common/vector.h
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "default.h"
-
-namespace embree
-{
- /*! invokes the memory monitor callback */
- struct MemoryMonitorInterface {
- virtual void memoryMonitor(ssize_t bytes, bool post) = 0;
- };
-
- /*! allocator that performs aligned monitored allocations */
- template<typename T, size_t alignment = 64>
- struct aligned_monitored_allocator
- {
- typedef T value_type;
- typedef T* pointer;
- typedef const T* const_pointer;
- typedef T& reference;
- typedef const T& const_reference;
- typedef std::size_t size_type;
- typedef std::ptrdiff_t difference_type;
-
- __forceinline aligned_monitored_allocator(MemoryMonitorInterface* device)
- : device(device), hugepages(false) {}
-
- __forceinline pointer allocate( size_type n )
- {
- if (n) {
- assert(device);
- device->memoryMonitor(n*sizeof(T),false);
- }
- if (n*sizeof(value_type) >= 14 * PAGE_SIZE_2M)
- {
- pointer p = (pointer) os_malloc(n*sizeof(value_type),hugepages);
- assert(p);
- return p;
- }
- return (pointer) alignedMalloc(n*sizeof(value_type),alignment);
- }
-
- __forceinline void deallocate( pointer p, size_type n )
- {
- if (p)
- {
- if (n*sizeof(value_type) >= 14 * PAGE_SIZE_2M)
- os_free(p,n*sizeof(value_type),hugepages);
- else
- alignedFree(p);
- }
- else assert(n == 0);
-
- if (n) {
- assert(device);
- device->memoryMonitor(-ssize_t(n)*sizeof(T),true);
- }
- }
-
- __forceinline void construct( pointer p, const_reference val ) {
- new (p) T(val);
- }
-
- __forceinline void destroy( pointer p ) {
- p->~T();
- }
-
- private:
- MemoryMonitorInterface* device;
- bool hugepages;
- };
-
- /*! monitored vector */
- template<typename T>
- using mvector = vector_t<T,aligned_monitored_allocator<T,std::alignment_of<T>::value> >;
-}