diff options
Diffstat (limited to 'thirdparty/embree-aarch64/kernels/common')
46 files changed, 0 insertions, 15110 deletions
diff --git a/thirdparty/embree-aarch64/kernels/common/accel.h b/thirdparty/embree-aarch64/kernels/common/accel.h deleted file mode 100644 index c038d3cf21..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/accel.h +++ /dev/null @@ -1,556 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" -#include "ray.h" -#include "point_query.h" -#include "context.h" - -namespace embree -{ - class Scene; - - /*! Base class for the acceleration structure data. */ - class AccelData : public RefCount - { - ALIGNED_CLASS_(16); - public: - enum Type { TY_UNKNOWN = 0, TY_ACCELN = 1, TY_ACCEL_INSTANCE = 2, TY_BVH4 = 3, TY_BVH8 = 4 }; - - public: - AccelData (const Type type) - : bounds(empty), type(type) {} - - /*! notifies the acceleration structure about the deletion of some geometry */ - virtual void deleteGeometry(size_t geomID) {}; - - /*! clears the acceleration structure data */ - virtual void clear() = 0; - - /*! returns normal bounds */ - __forceinline BBox3fa getBounds() const { - return bounds.bounds(); - } - - /*! returns bounds for some time */ - __forceinline BBox3fa getBounds(float t) const { - return bounds.interpolate(t); - } - - /*! returns linear bounds */ - __forceinline LBBox3fa getLinearBounds() const { - return bounds; - } - - /*! checks if acceleration structure is empty */ - __forceinline bool isEmpty() const { - return bounds.bounds0.lower.x == float(pos_inf); - } - - public: - LBBox3fa bounds; // linear bounds - Type type; - }; - - /*! Base class for all intersectable and buildable acceleration structures. */ - class Accel : public AccelData - { - ALIGNED_CLASS_(16); - public: - - struct Intersectors; - - /*! Type of collide function */ - typedef void (*CollideFunc)(void* bvh0, void* bvh1, RTCCollideFunc callback, void* userPtr); - - /*! Type of point query function */ - typedef bool(*PointQueryFunc)(Intersectors* This, /*!< this pointer to accel */ - PointQuery* query, /*!< point query for lookup */ - PointQueryContext* context); /*!< point query context */ - - /*! Type of intersect function pointer for single rays. */ - typedef void (*IntersectFunc)(Intersectors* This, /*!< this pointer to accel */ - RTCRayHit& ray, /*!< ray to intersect */ - IntersectContext* context); - - /*! Type of intersect function pointer for ray packets of size 4. */ - typedef void (*IntersectFunc4)(const void* valid, /*!< pointer to valid mask */ - Intersectors* This, /*!< this pointer to accel */ - RTCRayHit4& ray, /*!< ray packet to intersect */ - IntersectContext* context); - - /*! Type of intersect function pointer for ray packets of size 8. */ - typedef void (*IntersectFunc8)(const void* valid, /*!< pointer to valid mask */ - Intersectors* This, /*!< this pointer to accel */ - RTCRayHit8& ray, /*!< ray packet to intersect */ - IntersectContext* context); - - /*! Type of intersect function pointer for ray packets of size 16. */ - typedef void (*IntersectFunc16)(const void* valid, /*!< pointer to valid mask */ - Intersectors* This, /*!< this pointer to accel */ - RTCRayHit16& ray, /*!< ray packet to intersect */ - IntersectContext* context); - - /*! Type of intersect function pointer for ray packets of size N. */ - typedef void (*IntersectFuncN)(Intersectors* This, /*!< this pointer to accel */ - RTCRayHitN** ray, /*!< ray stream to intersect */ - const size_t N, /*!< number of rays in stream */ - IntersectContext* context /*!< layout flags */); - - - /*! Type of occlusion function pointer for single rays. */ - typedef void (*OccludedFunc) (Intersectors* This, /*!< this pointer to accel */ - RTCRay& ray, /*!< ray to test occlusion */ - IntersectContext* context); - - /*! Type of occlusion function pointer for ray packets of size 4. */ - typedef void (*OccludedFunc4) (const void* valid, /*!< pointer to valid mask */ - Intersectors* This, /*!< this pointer to accel */ - RTCRay4& ray, /*!< ray packet to test occlusion. */ - IntersectContext* context); - - /*! Type of occlusion function pointer for ray packets of size 8. */ - typedef void (*OccludedFunc8) (const void* valid, /*!< pointer to valid mask */ - Intersectors* This, /*!< this pointer to accel */ - RTCRay8& ray, /*!< ray packet to test occlusion. */ - IntersectContext* context); - - /*! Type of occlusion function pointer for ray packets of size 16. */ - typedef void (*OccludedFunc16) (const void* valid, /*!< pointer to valid mask */ - Intersectors* This, /*!< this pointer to accel */ - RTCRay16& ray, /*!< ray packet to test occlusion. */ - IntersectContext* context); - - /*! Type of intersect function pointer for ray packets of size N. */ - typedef void (*OccludedFuncN)(Intersectors* This, /*!< this pointer to accel */ - RTCRayN** ray, /*!< ray stream to test occlusion */ - const size_t N, /*!< number of rays in stream */ - IntersectContext* context /*!< layout flags */); - typedef void (*ErrorFunc) (); - - struct Collider - { - Collider (ErrorFunc error = nullptr) - : collide((CollideFunc)error), name(nullptr) {} - - Collider (CollideFunc collide, const char* name) - : collide(collide), name(name) {} - - operator bool() const { return name; } - - public: - CollideFunc collide; - const char* name; - }; - - struct Intersector1 - { - Intersector1 (ErrorFunc error = nullptr) - : intersect((IntersectFunc)error), occluded((OccludedFunc)error), name(nullptr) {} - - Intersector1 (IntersectFunc intersect, OccludedFunc occluded, const char* name) - : intersect(intersect), occluded(occluded), pointQuery(nullptr), name(name) {} - - Intersector1 (IntersectFunc intersect, OccludedFunc occluded, PointQueryFunc pointQuery, const char* name) - : intersect(intersect), occluded(occluded), pointQuery(pointQuery), name(name) {} - - operator bool() const { return name; } - - public: - static const char* type; - IntersectFunc intersect; - OccludedFunc occluded; - PointQueryFunc pointQuery; - const char* name; - }; - - struct Intersector4 - { - Intersector4 (ErrorFunc error = nullptr) - : intersect((IntersectFunc4)error), occluded((OccludedFunc4)error), name(nullptr) {} - - Intersector4 (IntersectFunc4 intersect, OccludedFunc4 occluded, const char* name) - : intersect(intersect), occluded(occluded), name(name) {} - - operator bool() const { return name; } - - public: - static const char* type; - IntersectFunc4 intersect; - OccludedFunc4 occluded; - const char* name; - }; - - struct Intersector8 - { - Intersector8 (ErrorFunc error = nullptr) - : intersect((IntersectFunc8)error), occluded((OccludedFunc8)error), name(nullptr) {} - - Intersector8 (IntersectFunc8 intersect, OccludedFunc8 occluded, const char* name) - : intersect(intersect), occluded(occluded), name(name) {} - - operator bool() const { return name; } - - public: - static const char* type; - IntersectFunc8 intersect; - OccludedFunc8 occluded; - const char* name; - }; - - struct Intersector16 - { - Intersector16 (ErrorFunc error = nullptr) - : intersect((IntersectFunc16)error), occluded((OccludedFunc16)error), name(nullptr) {} - - Intersector16 (IntersectFunc16 intersect, OccludedFunc16 occluded, const char* name) - : intersect(intersect), occluded(occluded), name(name) {} - - operator bool() const { return name; } - - public: - static const char* type; - IntersectFunc16 intersect; - OccludedFunc16 occluded; - const char* name; - }; - - struct IntersectorN - { - IntersectorN (ErrorFunc error = nullptr) - : intersect((IntersectFuncN)error), occluded((OccludedFuncN)error), name(nullptr) {} - - IntersectorN (IntersectFuncN intersect, OccludedFuncN occluded, const char* name) - : intersect(intersect), occluded(occluded), name(name) {} - - operator bool() const { return name; } - - public: - static const char* type; - IntersectFuncN intersect; - OccludedFuncN occluded; - const char* name; - }; - - struct Intersectors - { - Intersectors() - : ptr(nullptr), leafIntersector(nullptr), collider(nullptr), intersector1(nullptr), intersector4(nullptr), intersector8(nullptr), intersector16(nullptr), intersectorN(nullptr) {} - - Intersectors (ErrorFunc error) - : ptr(nullptr), leafIntersector(nullptr), collider(error), intersector1(error), intersector4(error), intersector8(error), intersector16(error), intersectorN(error) {} - - void print(size_t ident) - { - if (collider.name) { - for (size_t i=0; i<ident; i++) std::cout << " "; - std::cout << "collider = " << collider.name << std::endl; - } - if (intersector1.name) { - for (size_t i=0; i<ident; i++) std::cout << " "; - std::cout << "intersector1 = " << intersector1.name << std::endl; - } - if (intersector4.name) { - for (size_t i=0; i<ident; i++) std::cout << " "; - std::cout << "intersector4 = " << intersector4.name << std::endl; - } - if (intersector8.name) { - for (size_t i=0; i<ident; i++) std::cout << " "; - std::cout << "intersector8 = " << intersector8.name << std::endl; - } - if (intersector16.name) { - for (size_t i=0; i<ident; i++) std::cout << " "; - std::cout << "intersector16 = " << intersector16.name << std::endl; - } - if (intersectorN.name) { - for (size_t i=0; i<ident; i++) std::cout << " "; - std::cout << "intersectorN = " << intersectorN.name << std::endl; - } - } - - void select(bool filter) - { - if (intersector4_filter) { - if (filter) intersector4 = intersector4_filter; - else intersector4 = intersector4_nofilter; - } - if (intersector8_filter) { - if (filter) intersector8 = intersector8_filter; - else intersector8 = intersector8_nofilter; - } - if (intersector16_filter) { - if (filter) intersector16 = intersector16_filter; - else intersector16 = intersector16_nofilter; - } - if (intersectorN_filter) { - if (filter) intersectorN = intersectorN_filter; - else intersectorN = intersectorN_nofilter; - } - } - - __forceinline bool pointQuery (PointQuery* query, PointQueryContext* context) { - assert(intersector1.pointQuery); - return intersector1.pointQuery(this,query,context); - } - - /*! collides two scenes */ - __forceinline void collide (Accel* scene0, Accel* scene1, RTCCollideFunc callback, void* userPtr) { - assert(collider.collide); - collider.collide(scene0->intersectors.ptr,scene1->intersectors.ptr,callback,userPtr); - } - - /*! Intersects a single ray with the scene. */ - __forceinline void intersect (RTCRayHit& ray, IntersectContext* context) { - assert(intersector1.intersect); - intersector1.intersect(this,ray,context); - } - - /*! Intersects a packet of 4 rays with the scene. */ - __forceinline void intersect4 (const void* valid, RTCRayHit4& ray, IntersectContext* context) { - assert(intersector4.intersect); - intersector4.intersect(valid,this,ray,context); - } - - /*! Intersects a packet of 8 rays with the scene. */ - __forceinline void intersect8 (const void* valid, RTCRayHit8& ray, IntersectContext* context) { - assert(intersector8.intersect); - intersector8.intersect(valid,this,ray,context); - } - - /*! Intersects a packet of 16 rays with the scene. */ - __forceinline void intersect16 (const void* valid, RTCRayHit16& ray, IntersectContext* context) { - assert(intersector16.intersect); - intersector16.intersect(valid,this,ray,context); - } - - /*! Intersects a stream of N rays in SOA layout with the scene. */ - __forceinline void intersectN (RTCRayHitN** rayN, const size_t N, IntersectContext* context) - { - assert(intersectorN.intersect); - intersectorN.intersect(this,rayN,N,context); - } - -#if defined(__SSE__) || defined(__ARM_NEON) - __forceinline void intersect(const vbool4& valid, RayHitK<4>& ray, IntersectContext* context) { - const vint<4> mask = valid.mask32(); - intersect4(&mask,(RTCRayHit4&)ray,context); - } -#endif -#if defined(__AVX__) - __forceinline void intersect(const vbool8& valid, RayHitK<8>& ray, IntersectContext* context) { - const vint<8> mask = valid.mask32(); - intersect8(&mask,(RTCRayHit8&)ray,context); - } -#endif -#if defined(__AVX512F__) - __forceinline void intersect(const vbool16& valid, RayHitK<16>& ray, IntersectContext* context) { - const vint<16> mask = valid.mask32(); - intersect16(&mask,(RTCRayHit16&)ray,context); - } -#endif - - template<int K> - __forceinline void intersectN (RayHitK<K>** rayN, const size_t N, IntersectContext* context) - { - intersectN((RTCRayHitN**)rayN,N,context); - } - - /*! Tests if single ray is occluded by the scene. */ - __forceinline void occluded (RTCRay& ray, IntersectContext* context) { - assert(intersector1.occluded); - intersector1.occluded(this,ray,context); - } - - /*! Tests if a packet of 4 rays is occluded by the scene. */ - __forceinline void occluded4 (const void* valid, RTCRay4& ray, IntersectContext* context) { - assert(intersector4.occluded); - intersector4.occluded(valid,this,ray,context); - } - - /*! Tests if a packet of 8 rays is occluded by the scene. */ - __forceinline void occluded8 (const void* valid, RTCRay8& ray, IntersectContext* context) { - assert(intersector8.occluded); - intersector8.occluded(valid,this,ray,context); - } - - /*! Tests if a packet of 16 rays is occluded by the scene. */ - __forceinline void occluded16 (const void* valid, RTCRay16& ray, IntersectContext* context) { - assert(intersector16.occluded); - intersector16.occluded(valid,this,ray,context); - } - - /*! Tests if a stream of N rays in SOA layout is occluded by the scene. */ - __forceinline void occludedN (RTCRayN** rayN, const size_t N, IntersectContext* context) - { - assert(intersectorN.occluded); - intersectorN.occluded(this,rayN,N,context); - } - -#if defined(__SSE__) || defined(__ARM_NEON) - __forceinline void occluded(const vbool4& valid, RayK<4>& ray, IntersectContext* context) { - const vint<4> mask = valid.mask32(); - occluded4(&mask,(RTCRay4&)ray,context); - } -#endif -#if defined(__AVX__) - __forceinline void occluded(const vbool8& valid, RayK<8>& ray, IntersectContext* context) { - const vint<8> mask = valid.mask32(); - occluded8(&mask,(RTCRay8&)ray,context); - } -#endif -#if defined(__AVX512F__) - __forceinline void occluded(const vbool16& valid, RayK<16>& ray, IntersectContext* context) { - const vint<16> mask = valid.mask32(); - occluded16(&mask,(RTCRay16&)ray,context); - } -#endif - - template<int K> - __forceinline void occludedN (RayK<K>** rayN, const size_t N, IntersectContext* context) - { - occludedN((RTCRayN**)rayN,N,context); - } - - /*! Tests if single ray is occluded by the scene. */ - __forceinline void intersect(RTCRay& ray, IntersectContext* context) { - occluded(ray, context); - } - - /*! Tests if a packet of K rays is occluded by the scene. */ - template<int K> - __forceinline void intersect(const vbool<K>& valid, RayK<K>& ray, IntersectContext* context) { - occluded(valid, ray, context); - } - - /*! Tests if a packet of N rays in SOA layout is occluded by the scene. */ - template<int K> - __forceinline void intersectN(RayK<K>** rayN, const size_t N, IntersectContext* context) { - occludedN(rayN, N, context); - } - - public: - AccelData* ptr; - void* leafIntersector; - Collider collider; - Intersector1 intersector1; - Intersector4 intersector4; - Intersector4 intersector4_filter; - Intersector4 intersector4_nofilter; - Intersector8 intersector8; - Intersector8 intersector8_filter; - Intersector8 intersector8_nofilter; - Intersector16 intersector16; - Intersector16 intersector16_filter; - Intersector16 intersector16_nofilter; - IntersectorN intersectorN; - IntersectorN intersectorN_filter; - IntersectorN intersectorN_nofilter; - }; - - public: - - /*! Construction */ - Accel (const AccelData::Type type) - : AccelData(type) {} - - /*! Construction */ - Accel (const AccelData::Type type, const Intersectors& intersectors) - : AccelData(type), intersectors(intersectors) {} - - /*! Virtual destructor */ - virtual ~Accel() {} - - /*! makes the acceleration structure immutable */ - virtual void immutable () {} - - /*! build acceleration structure */ - virtual void build () = 0; - - public: - Intersectors intersectors; - }; - -#define DEFINE_COLLIDER(symbol,collider) \ - Accel::Collider symbol() { \ - return Accel::Collider((Accel::CollideFunc)collider::collide, \ - TOSTRING(isa) "::" TOSTRING(symbol)); \ - } - -#define DEFINE_INTERSECTOR1(symbol,intersector) \ - Accel::Intersector1 symbol() { \ - return Accel::Intersector1((Accel::IntersectFunc )intersector::intersect, \ - (Accel::OccludedFunc )intersector::occluded, \ - (Accel::PointQueryFunc)intersector::pointQuery,\ - TOSTRING(isa) "::" TOSTRING(symbol)); \ - } - -#define DEFINE_INTERSECTOR4(symbol,intersector) \ - Accel::Intersector4 symbol() { \ - return Accel::Intersector4((Accel::IntersectFunc4)intersector::intersect, \ - (Accel::OccludedFunc4)intersector::occluded, \ - TOSTRING(isa) "::" TOSTRING(symbol)); \ - } - -#define DEFINE_INTERSECTOR8(symbol,intersector) \ - Accel::Intersector8 symbol() { \ - return Accel::Intersector8((Accel::IntersectFunc8)intersector::intersect, \ - (Accel::OccludedFunc8)intersector::occluded, \ - TOSTRING(isa) "::" TOSTRING(symbol)); \ - } - -#define DEFINE_INTERSECTOR16(symbol,intersector) \ - Accel::Intersector16 symbol() { \ - return Accel::Intersector16((Accel::IntersectFunc16)intersector::intersect, \ - (Accel::OccludedFunc16)intersector::occluded, \ - TOSTRING(isa) "::" TOSTRING(symbol)); \ - } - -#define DEFINE_INTERSECTORN(symbol,intersector) \ - Accel::IntersectorN symbol() { \ - return Accel::IntersectorN((Accel::IntersectFuncN)intersector::intersect, \ - (Accel::OccludedFuncN)intersector::occluded, \ - TOSTRING(isa) "::" TOSTRING(symbol)); \ - } - - /* ray stream filter interface */ - typedef void (*intersectStreamAOS_func)(Scene* scene, RTCRayHit* _rayN, const size_t N, const size_t stride, IntersectContext* context); - typedef void (*intersectStreamAOP_func)(Scene* scene, RTCRayHit** _rayN, const size_t N, IntersectContext* context); - typedef void (*intersectStreamSOA_func)(Scene* scene, char* rayN, const size_t N, const size_t streams, const size_t stream_offset, IntersectContext* context); - typedef void (*intersectStreamSOP_func)(Scene* scene, const RTCRayHitNp* rayN, const size_t N, IntersectContext* context); - - typedef void (*occludedStreamAOS_func)(Scene* scene, RTCRay* _rayN, const size_t N, const size_t stride, IntersectContext* context); - typedef void (*occludedStreamAOP_func)(Scene* scene, RTCRay** _rayN, const size_t N, IntersectContext* context); - typedef void (*occludedStreamSOA_func)(Scene* scene, char* rayN, const size_t N, const size_t streams, const size_t stream_offset, IntersectContext* context); - typedef void (*occludedStreamSOP_func)(Scene* scene, const RTCRayNp* rayN, const size_t N, IntersectContext* context); - - struct RayStreamFilterFuncs - { - RayStreamFilterFuncs() - : intersectAOS(nullptr), intersectAOP(nullptr), intersectSOA(nullptr), intersectSOP(nullptr), - occludedAOS(nullptr), occludedAOP(nullptr), occludedSOA(nullptr), occludedSOP(nullptr) {} - - RayStreamFilterFuncs(void (*ptr) ()) - : intersectAOS((intersectStreamAOS_func) ptr), intersectAOP((intersectStreamAOP_func) ptr), intersectSOA((intersectStreamSOA_func) ptr), intersectSOP((intersectStreamSOP_func) ptr), - occludedAOS((occludedStreamAOS_func) ptr), occludedAOP((occludedStreamAOP_func) ptr), occludedSOA((occludedStreamSOA_func) ptr), occludedSOP((occludedStreamSOP_func) ptr) {} - - RayStreamFilterFuncs(intersectStreamAOS_func intersectAOS, intersectStreamAOP_func intersectAOP, intersectStreamSOA_func intersectSOA, intersectStreamSOP_func intersectSOP, - occludedStreamAOS_func occludedAOS, occludedStreamAOP_func occludedAOP, occludedStreamSOA_func occludedSOA, occludedStreamSOP_func occludedSOP) - : intersectAOS(intersectAOS), intersectAOP(intersectAOP), intersectSOA(intersectSOA), intersectSOP(intersectSOP), - occludedAOS(occludedAOS), occludedAOP(occludedAOP), occludedSOA(occludedSOA), occludedSOP(occludedSOP) {} - - public: - intersectStreamAOS_func intersectAOS; - intersectStreamAOP_func intersectAOP; - intersectStreamSOA_func intersectSOA; - intersectStreamSOP_func intersectSOP; - - occludedStreamAOS_func occludedAOS; - occludedStreamAOP_func occludedAOP; - occludedStreamSOA_func occludedSOA; - occludedStreamSOP_func occludedSOP; - }; - - typedef RayStreamFilterFuncs (*RayStreamFilterFuncsType)(); -} diff --git a/thirdparty/embree-aarch64/kernels/common/accelinstance.h b/thirdparty/embree-aarch64/kernels/common/accelinstance.h deleted file mode 100644 index d74b96df3f..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/accelinstance.h +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "accel.h" -#include "builder.h" - -namespace embree -{ - class AccelInstance : public Accel - { - public: - AccelInstance (AccelData* accel, Builder* builder, Intersectors& intersectors) - : Accel(AccelData::TY_ACCEL_INSTANCE,intersectors), accel(accel), builder(builder) {} - - void immutable () { - builder.reset(nullptr); - } - - public: - void build () { - if (builder) builder->build(); - bounds = accel->bounds; - } - - void deleteGeometry(size_t geomID) { - if (accel ) accel->deleteGeometry(geomID); - if (builder) builder->deleteGeometry(geomID); - } - - void clear() { - if (accel) accel->clear(); - if (builder) builder->clear(); - } - - private: - std::unique_ptr<AccelData> accel; - std::unique_ptr<Builder> builder; - }; -} diff --git a/thirdparty/embree-aarch64/kernels/common/acceln.cpp b/thirdparty/embree-aarch64/kernels/common/acceln.cpp deleted file mode 100644 index aadb4a64ef..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/acceln.cpp +++ /dev/null @@ -1,232 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "acceln.h" -#include "ray.h" -#include "../../include/embree3/rtcore_ray.h" -#include "../../common/algorithms/parallel_for.h" - -namespace embree -{ - AccelN::AccelN() - : Accel(AccelData::TY_ACCELN), accels() {} - - AccelN::~AccelN() - { - for (size_t i=0; i<accels.size(); i++) - delete accels[i]; - } - - void AccelN::accels_add(Accel* accel) - { - assert(accel); - accels.push_back(accel); - } - - void AccelN::accels_init() - { - for (size_t i=0; i<accels.size(); i++) - delete accels[i]; - - accels.clear(); - } - - bool AccelN::pointQuery (Accel::Intersectors* This_in, PointQuery* query, PointQueryContext* context) - { - bool changed = false; - AccelN* This = (AccelN*)This_in->ptr; - for (size_t i=0; i<This->accels.size(); i++) - if (!This->accels[i]->isEmpty()) - changed |= This->accels[i]->intersectors.pointQuery(query,context); - return changed; - } - - void AccelN::intersect (Accel::Intersectors* This_in, RTCRayHit& ray, IntersectContext* context) - { - AccelN* This = (AccelN*)This_in->ptr; - for (size_t i=0; i<This->accels.size(); i++) - if (!This->accels[i]->isEmpty()) - This->accels[i]->intersectors.intersect(ray,context); - } - - void AccelN::intersect4 (const void* valid, Accel::Intersectors* This_in, RTCRayHit4& ray, IntersectContext* context) - { - AccelN* This = (AccelN*)This_in->ptr; - for (size_t i=0; i<This->accels.size(); i++) - if (!This->accels[i]->isEmpty()) - This->accels[i]->intersectors.intersect4(valid,ray,context); - } - - void AccelN::intersect8 (const void* valid, Accel::Intersectors* This_in, RTCRayHit8& ray, IntersectContext* context) - { - AccelN* This = (AccelN*)This_in->ptr; - for (size_t i=0; i<This->accels.size(); i++) - if (!This->accels[i]->isEmpty()) - This->accels[i]->intersectors.intersect8(valid,ray,context); - } - - void AccelN::intersect16 (const void* valid, Accel::Intersectors* This_in, RTCRayHit16& ray, IntersectContext* context) - { - AccelN* This = (AccelN*)This_in->ptr; - for (size_t i=0; i<This->accels.size(); i++) - if (!This->accels[i]->isEmpty()) - This->accels[i]->intersectors.intersect16(valid,ray,context); - } - - void AccelN::intersectN (Accel::Intersectors* This_in, RTCRayHitN** ray, const size_t N, IntersectContext* context) - { - AccelN* This = (AccelN*)This_in->ptr; - for (size_t i=0; i<This->accels.size(); i++) - if (!This->accels[i]->isEmpty()) - This->accels[i]->intersectors.intersectN(ray,N,context); - } - - void AccelN::occluded (Accel::Intersectors* This_in, RTCRay& ray, IntersectContext* context) - { - AccelN* This = (AccelN*)This_in->ptr; - for (size_t i=0; i<This->accels.size(); i++) { - if (This->accels[i]->isEmpty()) continue; - This->accels[i]->intersectors.occluded(ray,context); - if (ray.tfar < 0.0f) break; - } - } - - void AccelN::occluded4 (const void* valid, Accel::Intersectors* This_in, RTCRay4& ray, IntersectContext* context) - { - AccelN* This = (AccelN*)This_in->ptr; - for (size_t i=0; i<This->accels.size(); i++) { - if (This->accels[i]->isEmpty()) continue; - This->accels[i]->intersectors.occluded4(valid,ray,context); -#if defined(__SSE2__) || defined(__ARM_NEON) - vbool4 valid0 = asBool(((vint4*)valid)[0]); - vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero); - if (unlikely(none(valid0 & hit0))) break; -#endif - } - } - - void AccelN::occluded8 (const void* valid, Accel::Intersectors* This_in, RTCRay8& ray, IntersectContext* context) - { - AccelN* This = (AccelN*)This_in->ptr; - for (size_t i=0; i<This->accels.size(); i++) { - if (This->accels[i]->isEmpty()) continue; - This->accels[i]->intersectors.occluded8(valid,ray,context); -#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA - vbool4 valid0 = asBool(((vint4*)valid)[0]); - vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero); - vbool4 valid1 = asBool(((vint4*)valid)[1]); - vbool4 hit1 = ((vfloat4*)ray.tfar)[1] >= vfloat4(zero); - if (unlikely((none((valid0 & hit0) | (valid1 & hit1))))) break; -#endif - } - } - - void AccelN::occluded16 (const void* valid, Accel::Intersectors* This_in, RTCRay16& ray, IntersectContext* context) - { - AccelN* This = (AccelN*)This_in->ptr; - for (size_t i=0; i<This->accels.size(); i++) { - if (This->accels[i]->isEmpty()) continue; - This->accels[i]->intersectors.occluded16(valid,ray,context); -#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA - vbool4 valid0 = asBool(((vint4*)valid)[0]); - vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero); - vbool4 valid1 = asBool(((vint4*)valid)[1]); - vbool4 hit1 = ((vfloat4*)ray.tfar)[1] >= vfloat4(zero); - vbool4 valid2 = asBool(((vint4*)valid)[2]); - vbool4 hit2 = ((vfloat4*)ray.tfar)[2] >= vfloat4(zero); - vbool4 valid3 = asBool(((vint4*)valid)[3]); - vbool4 hit3 = ((vfloat4*)ray.tfar)[3] >= vfloat4(zero); - if (unlikely((none((valid0 & hit0) | (valid1 & hit1) | (valid2 & hit2) | (valid3 & hit3))))) break; -#endif - } - } - - void AccelN::occludedN (Accel::Intersectors* This_in, RTCRayN** ray, const size_t N, IntersectContext* context) - { - AccelN* This = (AccelN*)This_in->ptr; - size_t M = N; - for (size_t i=0; i<This->accels.size(); i++) - if (!This->accels[i]->isEmpty()) - This->accels[i]->intersectors.occludedN(ray,M,context); - } - - void AccelN::accels_print(size_t ident) - { - for (size_t i=0; i<accels.size(); i++) - { - for (size_t j=0; j<ident; j++) std::cout << " "; - std::cout << "accels[" << i << "]" << std::endl; - accels[i]->intersectors.print(ident+2); - } - } - - void AccelN::accels_immutable() - { - for (size_t i=0; i<accels.size(); i++) - accels[i]->immutable(); - } - - void AccelN::accels_build () - { - /* reduce memory consumption */ - accels.shrink_to_fit(); - - /* build all acceleration structures in parallel */ - parallel_for (accels.size(), [&] (size_t i) { - accels[i]->build(); - }); - - /* create list of non-empty acceleration structures */ - bool valid1 = true; - bool valid4 = true; - bool valid8 = true; - bool valid16 = true; - for (size_t i=0; i<accels.size(); i++) { - valid1 &= (bool) accels[i]->intersectors.intersector1; - valid4 &= (bool) accels[i]->intersectors.intersector4; - valid8 &= (bool) accels[i]->intersectors.intersector8; - valid16 &= (bool) accels[i]->intersectors.intersector16; - } - - if (accels.size() == 1) { - type = accels[0]->type; // FIXME: should just assign entire Accel - bounds = accels[0]->bounds; - intersectors = accels[0]->intersectors; - } - else - { - type = AccelData::TY_ACCELN; - intersectors.ptr = this; - intersectors.intersector1 = Intersector1(&intersect,&occluded,&pointQuery,valid1 ? "AccelN::intersector1": nullptr); - intersectors.intersector4 = Intersector4(&intersect4,&occluded4,valid4 ? "AccelN::intersector4" : nullptr); - intersectors.intersector8 = Intersector8(&intersect8,&occluded8,valid8 ? "AccelN::intersector8" : nullptr); - intersectors.intersector16 = Intersector16(&intersect16,&occluded16,valid16 ? "AccelN::intersector16": nullptr); - intersectors.intersectorN = IntersectorN(&intersectN,&occludedN,"AccelN::intersectorN"); - - /*! calculate bounds */ - bounds = empty; - for (size_t i=0; i<accels.size(); i++) - bounds.extend(accels[i]->bounds); - } - } - - void AccelN::accels_select(bool filter) - { - for (size_t i=0; i<accels.size(); i++) - accels[i]->intersectors.select(filter); - } - - void AccelN::accels_deleteGeometry(size_t geomID) - { - for (size_t i=0; i<accels.size(); i++) - accels[i]->deleteGeometry(geomID); - } - - void AccelN::accels_clear() - { - for (size_t i=0; i<accels.size(); i++) { - accels[i]->clear(); - } - } -} - diff --git a/thirdparty/embree-aarch64/kernels/common/acceln.h b/thirdparty/embree-aarch64/kernels/common/acceln.h deleted file mode 100644 index 2edd98f647..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/acceln.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "accel.h" - -namespace embree -{ - /*! merges N acceleration structures together, by processing them in order */ - class AccelN : public Accel - { - public: - AccelN (); - ~AccelN(); - - public: - void accels_add(Accel* accel); - void accels_init(); - - public: - static bool pointQuery (Accel::Intersectors* This, PointQuery* query, PointQueryContext* context); - - public: - static void intersect (Accel::Intersectors* This, RTCRayHit& ray, IntersectContext* context); - static void intersect4 (const void* valid, Accel::Intersectors* This, RTCRayHit4& ray, IntersectContext* context); - static void intersect8 (const void* valid, Accel::Intersectors* This, RTCRayHit8& ray, IntersectContext* context); - static void intersect16 (const void* valid, Accel::Intersectors* This, RTCRayHit16& ray, IntersectContext* context); - static void intersectN (Accel::Intersectors* This, RTCRayHitN** ray, const size_t N, IntersectContext* context); - - public: - static void occluded (Accel::Intersectors* This, RTCRay& ray, IntersectContext* context); - static void occluded4 (const void* valid, Accel::Intersectors* This, RTCRay4& ray, IntersectContext* context); - static void occluded8 (const void* valid, Accel::Intersectors* This, RTCRay8& ray, IntersectContext* context); - static void occluded16 (const void* valid, Accel::Intersectors* This, RTCRay16& ray, IntersectContext* context); - static void occludedN (Accel::Intersectors* This, RTCRayN** ray, const size_t N, IntersectContext* context); - - public: - void accels_print(size_t ident); - void accels_immutable(); - void accels_build (); - void accels_select(bool filter); - void accels_deleteGeometry(size_t geomID); - void accels_clear (); - - public: - std::vector<Accel*> accels; - }; -} diff --git a/thirdparty/embree-aarch64/kernels/common/accelset.cpp b/thirdparty/embree-aarch64/kernels/common/accelset.cpp deleted file mode 100644 index 79be1c4301..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/accelset.cpp +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "accelset.h" -#include "scene.h" - -namespace embree -{ - AccelSet::AccelSet (Device* device, Geometry::GType gtype, size_t numItems, size_t numTimeSteps) - : Geometry(device,gtype,(unsigned int)numItems,(unsigned int)numTimeSteps), boundsFunc(nullptr) {} - - AccelSet::IntersectorN::IntersectorN (ErrorFunc error) - : intersect((IntersectFuncN)error), occluded((OccludedFuncN)error), name(nullptr) {} - - AccelSet::IntersectorN::IntersectorN (IntersectFuncN intersect, OccludedFuncN occluded, const char* name) - : intersect(intersect), occluded(occluded), name(name) {} -} diff --git a/thirdparty/embree-aarch64/kernels/common/accelset.h b/thirdparty/embree-aarch64/kernels/common/accelset.h deleted file mode 100644 index 3774b2accb..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/accelset.h +++ /dev/null @@ -1,248 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" -#include "builder.h" -#include "geometry.h" -#include "ray.h" -#include "hit.h" - -namespace embree -{ - struct IntersectFunctionNArguments; - struct OccludedFunctionNArguments; - - typedef void (*ReportIntersectionFunc) (IntersectFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args); - typedef void (*ReportOcclusionFunc) (OccludedFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args); - - struct IntersectFunctionNArguments : public RTCIntersectFunctionNArguments - { - IntersectContext* internal_context; - Geometry* geometry; - ReportIntersectionFunc report; - }; - - struct OccludedFunctionNArguments : public RTCOccludedFunctionNArguments - { - IntersectContext* internal_context; - Geometry* geometry; - ReportOcclusionFunc report; - }; - - /*! Base class for set of acceleration structures. */ - class AccelSet : public Geometry - { - public: - typedef RTCIntersectFunctionN IntersectFuncN; - typedef RTCOccludedFunctionN OccludedFuncN; - typedef void (*ErrorFunc) (); - - struct IntersectorN - { - IntersectorN (ErrorFunc error = nullptr) ; - IntersectorN (IntersectFuncN intersect, OccludedFuncN occluded, const char* name); - - operator bool() const { return name; } - - public: - static const char* type; - IntersectFuncN intersect; - OccludedFuncN occluded; - const char* name; - }; - - public: - - /*! construction */ - AccelSet (Device* device, Geometry::GType gtype, size_t items, size_t numTimeSteps); - - /*! makes the acceleration structure immutable */ - virtual void immutable () {} - - /*! build accel */ - virtual void build () = 0; - - /*! check if the i'th primitive is valid between the specified time range */ - __forceinline bool valid(size_t i, const range<size_t>& itime_range) const - { - for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++) - if (!isvalid_non_empty(bounds(i,itime))) return false; - - return true; - } - - /*! Calculates the bounds of an item */ - __forceinline BBox3fa bounds(size_t i, size_t itime = 0) const - { - BBox3fa box; - assert(i < size()); - RTCBoundsFunctionArguments args; - args.geometryUserPtr = userPtr; - args.primID = (unsigned int)i; - args.timeStep = (unsigned int)itime; - args.bounds_o = (RTCBounds*)&box; - boundsFunc(&args); - return box; - } - - /*! calculates the linear bounds of the i'th item at the itime'th time segment */ - __forceinline LBBox3fa linearBounds(size_t i, size_t itime) const - { - BBox3fa box[2]; - assert(i < size()); - RTCBoundsFunctionArguments args; - args.geometryUserPtr = userPtr; - args.primID = (unsigned int)i; - args.timeStep = (unsigned int)(itime+0); - args.bounds_o = (RTCBounds*)&box[0]; - boundsFunc(&args); - args.timeStep = (unsigned int)(itime+1); - args.bounds_o = (RTCBounds*)&box[1]; - boundsFunc(&args); - return LBBox3fa(box[0],box[1]); - } - - /*! calculates the build bounds of the i'th item, if it's valid */ - __forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const - { - const BBox3fa b = bounds(i); - if (bbox) *bbox = b; - return isvalid_non_empty(b); - } - - /*! calculates the build bounds of the i'th item at the itime'th time segment, if it's valid */ - __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const - { - const LBBox3fa bounds = linearBounds(i,itime); - bbox = bounds.bounds0; // use bounding box of first timestep to build BVH - return isvalid_non_empty(bounds); - } - - /*! calculates the linear bounds of the i'th primitive for the specified time range */ - __forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const { - return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments); - } - - /*! calculates the linear bounds of the i'th primitive for the specified time range */ - __forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const { - if (!valid(i, timeSegmentRange(time_range))) return false; - bbox = linearBounds(i, time_range); - return true; - } - - /* gets version info of topology */ - unsigned int getTopologyVersion() const { - return numPrimitives; - } - - /* returns true if topology changed */ - bool topologyChanged(unsigned int otherVersion) const { - return numPrimitives != otherVersion; - } - - public: - - /*! Intersects a single ray with the scene. */ - __forceinline void intersect (RayHit& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportIntersectionFunc report) - { - assert(primID < size()); - assert(intersectorN.intersect); - - int mask = -1; - IntersectFunctionNArguments args; - args.valid = &mask; - args.geometryUserPtr = userPtr; - args.context = context->user; - args.rayhit = (RTCRayHitN*)&ray; - args.N = 1; - args.geomID = geomID; - args.primID = primID; - args.internal_context = context; - args.geometry = this; - args.report = report; - - intersectorN.intersect(&args); - } - - /*! Tests if single ray is occluded by the scene. */ - __forceinline void occluded (Ray& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportOcclusionFunc report) - { - assert(primID < size()); - assert(intersectorN.occluded); - - int mask = -1; - OccludedFunctionNArguments args; - args.valid = &mask; - args.geometryUserPtr = userPtr; - args.context = context->user; - args.ray = (RTCRayN*)&ray; - args.N = 1; - args.geomID = geomID; - args.primID = primID; - args.internal_context = context; - args.geometry = this; - args.report = report; - - intersectorN.occluded(&args); - } - - /*! Intersects a packet of K rays with the scene. */ - template<int K> - __forceinline void intersect (const vbool<K>& valid, RayHitK<K>& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportIntersectionFunc report) - { - assert(primID < size()); - assert(intersectorN.intersect); - - vint<K> mask = valid.mask32(); - IntersectFunctionNArguments args; - args.valid = (int*)&mask; - args.geometryUserPtr = userPtr; - args.context = context->user; - args.rayhit = (RTCRayHitN*)&ray; - args.N = K; - args.geomID = geomID; - args.primID = primID; - args.internal_context = context; - args.geometry = this; - args.report = report; - - intersectorN.intersect(&args); - } - - /*! Tests if a packet of K rays is occluded by the scene. */ - template<int K> - __forceinline void occluded (const vbool<K>& valid, RayK<K>& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportOcclusionFunc report) - { - assert(primID < size()); - assert(intersectorN.occluded); - - vint<K> mask = valid.mask32(); - OccludedFunctionNArguments args; - args.valid = (int*)&mask; - args.geometryUserPtr = userPtr; - args.context = context->user; - args.ray = (RTCRayN*)&ray; - args.N = K; - args.geomID = geomID; - args.primID = primID; - args.internal_context = context; - args.geometry = this; - args.report = report; - - intersectorN.occluded(&args); - } - - public: - RTCBoundsFunction boundsFunc; - IntersectorN intersectorN; - }; - -#define DEFINE_SET_INTERSECTORN(symbol,intersector) \ - AccelSet::IntersectorN symbol() { \ - return AccelSet::IntersectorN(intersector::intersect, \ - intersector::occluded, \ - TOSTRING(isa) "::" TOSTRING(symbol)); \ - } -} diff --git a/thirdparty/embree-aarch64/kernels/common/alloc.cpp b/thirdparty/embree-aarch64/kernels/common/alloc.cpp deleted file mode 100644 index 6fa406f03a..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/alloc.cpp +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "alloc.h" -#include "../../common/sys/thread.h" -#if defined(__aarch64__) && defined(BUILD_IOS) -#include "../../common/sys/barrier.h" -#endif - -namespace embree -{ - __thread FastAllocator::ThreadLocal2* FastAllocator::thread_local_allocator2 = nullptr; - SpinLock FastAllocator::s_thread_local_allocators_lock; - std::vector<std::unique_ptr<FastAllocator::ThreadLocal2>> FastAllocator::s_thread_local_allocators; - - struct fast_allocator_regression_test : public RegressionTest - { - BarrierSys barrier; - std::atomic<size_t> numFailed; - std::unique_ptr<FastAllocator> alloc; - - fast_allocator_regression_test() - : RegressionTest("fast_allocator_regression_test"), numFailed(0) - { - registerRegressionTest(this); - } - - static void thread_alloc(fast_allocator_regression_test* This) - { - FastAllocator::CachedAllocator threadalloc = This->alloc->getCachedAllocator(); - - size_t* ptrs[1000]; - for (size_t j=0; j<1000; j++) - { - This->barrier.wait(); - for (size_t i=0; i<1000; i++) { - ptrs[i] = (size_t*) threadalloc.malloc0(sizeof(size_t)+(i%32)); - *ptrs[i] = size_t(threadalloc.talloc0) + i; - } - for (size_t i=0; i<1000; i++) { - if (*ptrs[i] != size_t(threadalloc.talloc0) + i) - This->numFailed++; - } - This->barrier.wait(); - } - } - - bool run () - { - alloc = make_unique(new FastAllocator(nullptr,false)); - numFailed.store(0); - - size_t numThreads = getNumberOfLogicalThreads(); - barrier.init(numThreads+1); - - /* create threads */ - std::vector<thread_t> threads; - for (size_t i=0; i<numThreads; i++) - threads.push_back(createThread((thread_func)thread_alloc,this)); - - /* run test */ - for (size_t i=0; i<1000; i++) - { - alloc->reset(); - barrier.wait(); - barrier.wait(); - } - - /* destroy threads */ - for (size_t i=0; i<numThreads; i++) - join(threads[i]); - - alloc = nullptr; - - return numFailed == 0; - } - }; - - fast_allocator_regression_test fast_allocator_regression; -} - - diff --git a/thirdparty/embree-aarch64/kernels/common/alloc.h b/thirdparty/embree-aarch64/kernels/common/alloc.h deleted file mode 100644 index 488fa707ef..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/alloc.h +++ /dev/null @@ -1,1006 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" -#include "device.h" -#include "scene.h" -#include "primref.h" - -#if defined(__aarch64__) && defined(BUILD_IOS) -#include <mutex> -#endif - -namespace embree -{ - class FastAllocator - { - /*! maximum supported alignment */ - static const size_t maxAlignment = 64; - - /*! maximum allocation size */ - - /* default settings */ - //static const size_t defaultBlockSize = 4096; -#define maxAllocationSize size_t(2*1024*1024-maxAlignment) - - static const size_t MAX_THREAD_USED_BLOCK_SLOTS = 8; - - public: - - struct ThreadLocal2; - enum AllocationType { ALIGNED_MALLOC, EMBREE_OS_MALLOC, SHARED, ANY_TYPE }; - - /*! Per thread structure holding the current memory block. */ - struct __aligned(64) ThreadLocal - { - ALIGNED_CLASS_(64); - public: - - /*! Constructor for usage with ThreadLocalData */ - __forceinline ThreadLocal (ThreadLocal2* parent) - : parent(parent), ptr(nullptr), cur(0), end(0), allocBlockSize(0), bytesUsed(0), bytesWasted(0) {} - - /*! initialize allocator */ - void init(FastAllocator* alloc) - { - ptr = nullptr; - cur = end = 0; - bytesUsed = 0; - bytesWasted = 0; - allocBlockSize = 0; - if (alloc) allocBlockSize = alloc->defaultBlockSize; - } - - /* Allocate aligned memory from the threads memory block. */ - __forceinline void* malloc(FastAllocator* alloc, size_t bytes, size_t align = 16) - { - /* bind the thread local allocator to the proper FastAllocator*/ - parent->bind(alloc); - - assert(align <= maxAlignment); - bytesUsed += bytes; - - /* try to allocate in local block */ - size_t ofs = (align - cur) & (align-1); - cur += bytes + ofs; - if (likely(cur <= end)) { bytesWasted += ofs; return &ptr[cur - bytes]; } - cur -= bytes + ofs; - - /* if allocation is too large allocate with parent allocator */ - if (4*bytes > allocBlockSize) { - return alloc->malloc(bytes,maxAlignment,false); - } - - /* get new partial block if allocation failed */ - size_t blockSize = allocBlockSize; - ptr = (char*) alloc->malloc(blockSize,maxAlignment,true); - bytesWasted += end-cur; - cur = 0; end = blockSize; - - /* retry allocation */ - ofs = (align - cur) & (align-1); - cur += bytes + ofs; - if (likely(cur <= end)) { bytesWasted += ofs; return &ptr[cur - bytes]; } - cur -= bytes + ofs; - - /* get new full block if allocation failed */ - blockSize = allocBlockSize; - ptr = (char*) alloc->malloc(blockSize,maxAlignment,false); - bytesWasted += end-cur; - cur = 0; end = blockSize; - - /* retry allocation */ - ofs = (align - cur) & (align-1); - cur += bytes + ofs; - if (likely(cur <= end)) { bytesWasted += ofs; return &ptr[cur - bytes]; } - cur -= bytes + ofs; - - /* should never happen as large allocations get handled specially above */ - assert(false); - return nullptr; - } - - - /*! returns amount of used bytes */ - __forceinline size_t getUsedBytes() const { return bytesUsed; } - - /*! returns amount of free bytes */ - __forceinline size_t getFreeBytes() const { return end-cur; } - - /*! returns amount of wasted bytes */ - __forceinline size_t getWastedBytes() const { return bytesWasted; } - - private: - ThreadLocal2* parent; - char* ptr; //!< pointer to memory block - size_t cur; //!< current location of the allocator - size_t end; //!< end of the memory block - size_t allocBlockSize; //!< block size for allocations - size_t bytesUsed; //!< number of total bytes allocated - size_t bytesWasted; //!< number of bytes wasted - }; - - /*! Two thread local structures. */ - struct __aligned(64) ThreadLocal2 - { - ALIGNED_CLASS_(64); - public: - - __forceinline ThreadLocal2() - : alloc(nullptr), alloc0(this), alloc1(this) {} - - /*! bind to fast allocator */ - __forceinline void bind(FastAllocator* alloc_i) - { - assert(alloc_i); - if (alloc.load() == alloc_i) return; -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(mutex); -#else - Lock<SpinLock> lock(mutex); -#endif - //if (alloc.load() == alloc_i) return; // not required as only one thread calls bind - if (alloc.load()) { - alloc.load()->bytesUsed += alloc0.getUsedBytes() + alloc1.getUsedBytes(); - alloc.load()->bytesFree += alloc0.getFreeBytes() + alloc1.getFreeBytes(); - alloc.load()->bytesWasted += alloc0.getWastedBytes() + alloc1.getWastedBytes(); - } - alloc0.init(alloc_i); - alloc1.init(alloc_i); - alloc.store(alloc_i); - alloc_i->join(this); - } - - /*! unbind to fast allocator */ - void unbind(FastAllocator* alloc_i) - { - assert(alloc_i); - if (alloc.load() != alloc_i) return; -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(mutex); -#else - Lock<SpinLock> lock(mutex); -#endif - if (alloc.load() != alloc_i) return; // required as a different thread calls unbind - alloc.load()->bytesUsed += alloc0.getUsedBytes() + alloc1.getUsedBytes(); - alloc.load()->bytesFree += alloc0.getFreeBytes() + alloc1.getFreeBytes(); - alloc.load()->bytesWasted += alloc0.getWastedBytes() + alloc1.getWastedBytes(); - alloc0.init(nullptr); - alloc1.init(nullptr); - alloc.store(nullptr); - } - - public: -#if defined(__aarch64__) && defined(BUILD_IOS) - std::mutex mutex; -#else - SpinLock mutex; //!< required as unbind is called from other threads -#endif - std::atomic<FastAllocator*> alloc; //!< parent allocator - ThreadLocal alloc0; - ThreadLocal alloc1; - }; - - FastAllocator (Device* device, bool osAllocation) - : device(device), slotMask(0), usedBlocks(nullptr), freeBlocks(nullptr), use_single_mode(false), defaultBlockSize(PAGE_SIZE), estimatedSize(0), - growSize(PAGE_SIZE), maxGrowSize(maxAllocationSize), log2_grow_size_scale(0), bytesUsed(0), bytesFree(0), bytesWasted(0), atype(osAllocation ? EMBREE_OS_MALLOC : ALIGNED_MALLOC), - primrefarray(device,0) - { - for (size_t i=0; i<MAX_THREAD_USED_BLOCK_SLOTS; i++) - { - threadUsedBlocks[i] = nullptr; - threadBlocks[i] = nullptr; - assert(!slotMutex[i].isLocked()); - } - } - - ~FastAllocator () { - clear(); - } - - /*! returns the device attached to this allocator */ - Device* getDevice() { - return device; - } - - void share(mvector<PrimRef>& primrefarray_i) { - primrefarray = std::move(primrefarray_i); - } - - void unshare(mvector<PrimRef>& primrefarray_o) - { - reset(); // this removes blocks that are allocated inside the shared primref array - primrefarray_o = std::move(primrefarray); - } - - /*! returns first fast thread local allocator */ - __forceinline ThreadLocal* _threadLocal() { - return &threadLocal2()->alloc0; - } - - void setOSallocation(bool flag) - { - atype = flag ? EMBREE_OS_MALLOC : ALIGNED_MALLOC; - } - - private: - - /*! returns both fast thread local allocators */ - __forceinline ThreadLocal2* threadLocal2() - { - ThreadLocal2* alloc = thread_local_allocator2; - if (alloc == nullptr) { - thread_local_allocator2 = alloc = new ThreadLocal2; -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(s_thread_local_allocators_lock); -#else - Lock<SpinLock> lock(s_thread_local_allocators_lock); -#endif - s_thread_local_allocators.push_back(make_unique(alloc)); - } - return alloc; - } - - public: - - __forceinline void join(ThreadLocal2* alloc) - { -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(s_thread_local_allocators_lock); -#else - Lock<SpinLock> lock(thread_local_allocators_lock); -#endif - thread_local_allocators.push_back(alloc); - } - - public: - - struct CachedAllocator - { - __forceinline CachedAllocator(void* ptr) - : alloc(nullptr), talloc0(nullptr), talloc1(nullptr) - { - assert(ptr == nullptr); - } - - __forceinline CachedAllocator(FastAllocator* alloc, ThreadLocal2* talloc) - : alloc(alloc), talloc0(&talloc->alloc0), talloc1(alloc->use_single_mode ? &talloc->alloc0 : &talloc->alloc1) {} - - __forceinline operator bool () const { - return alloc != nullptr; - } - - __forceinline void* operator() (size_t bytes, size_t align = 16) const { - return talloc0->malloc(alloc,bytes,align); - } - - __forceinline void* malloc0 (size_t bytes, size_t align = 16) const { - return talloc0->malloc(alloc,bytes,align); - } - - __forceinline void* malloc1 (size_t bytes, size_t align = 16) const { - return talloc1->malloc(alloc,bytes,align); - } - - public: - FastAllocator* alloc; - ThreadLocal* talloc0; - ThreadLocal* talloc1; - }; - - __forceinline CachedAllocator getCachedAllocator() { - return CachedAllocator(this,threadLocal2()); - } - - /*! Builder interface to create thread local allocator */ - struct Create - { - public: - __forceinline Create (FastAllocator* allocator) : allocator(allocator) {} - __forceinline CachedAllocator operator() () const { return allocator->getCachedAllocator(); } - - private: - FastAllocator* allocator; - }; - - void internal_fix_used_blocks() - { - /* move thread local blocks to global block list */ - for (size_t i = 0; i < MAX_THREAD_USED_BLOCK_SLOTS; i++) - { - while (threadBlocks[i].load() != nullptr) { - Block* nextUsedBlock = threadBlocks[i].load()->next; - threadBlocks[i].load()->next = usedBlocks.load(); - usedBlocks = threadBlocks[i].load(); - threadBlocks[i] = nextUsedBlock; - } - threadBlocks[i] = nullptr; - } - } - - static const size_t threadLocalAllocOverhead = 20; //! 20 means 5% parallel allocation overhead through unfilled thread local blocks -#if defined(__AVX512ER__) // KNL - static const size_t mainAllocOverheadStatic = 15; //! 15 means 7.5% allocation overhead through unfilled main alloc blocks -#else - static const size_t mainAllocOverheadStatic = 20; //! 20 means 5% allocation overhead through unfilled main alloc blocks -#endif - static const size_t mainAllocOverheadDynamic = 8; //! 20 means 12.5% allocation overhead through unfilled main alloc blocks - - /* calculates a single threaded threshold for the builders such - * that for small scenes the overhead of partly allocated blocks - * per thread is low */ - size_t fixSingleThreadThreshold(size_t branchingFactor, size_t defaultThreshold, size_t numPrimitives, size_t bytesEstimated) - { - if (numPrimitives == 0 || bytesEstimated == 0) - return defaultThreshold; - - /* calculate block size in bytes to fulfill threadLocalAllocOverhead constraint */ - const size_t single_mode_factor = use_single_mode ? 1 : 2; - const size_t threadCount = TaskScheduler::threadCount(); - const size_t singleThreadBytes = single_mode_factor*threadLocalAllocOverhead*defaultBlockSize; - - /* if we do not have to limit number of threads use optimal thresdhold */ - if ( (bytesEstimated+(singleThreadBytes-1))/singleThreadBytes >= threadCount) - return defaultThreshold; - - /* otherwise limit number of threads by calculating proper single thread threshold */ - else { - double bytesPerPrimitive = double(bytesEstimated)/double(numPrimitives); - return size_t(ceil(branchingFactor*singleThreadBytes/bytesPerPrimitive)); - } - } - - __forceinline size_t alignSize(size_t i) { - return (i+127)/128*128; - } - - /*! initializes the grow size */ - __forceinline void initGrowSizeAndNumSlots(size_t bytesEstimated, bool fast) - { - /* we do not need single thread local allocator mode */ - use_single_mode = false; - - /* calculate growSize such that at most mainAllocationOverhead gets wasted when a block stays unused */ - size_t mainAllocOverhead = fast ? mainAllocOverheadDynamic : mainAllocOverheadStatic; - size_t blockSize = alignSize(bytesEstimated/mainAllocOverhead); - growSize = maxGrowSize = clamp(blockSize,size_t(1024),maxAllocationSize); - - /* if we reached the maxAllocationSize for growSize, we can - * increase the number of allocation slots by still guaranteeing - * the mainAllocationOverhead */ - slotMask = 0x0; - - if (MAX_THREAD_USED_BLOCK_SLOTS >= 2 && bytesEstimated > 2*mainAllocOverhead*growSize) slotMask = 0x1; - if (MAX_THREAD_USED_BLOCK_SLOTS >= 4 && bytesEstimated > 4*mainAllocOverhead*growSize) slotMask = 0x3; - if (MAX_THREAD_USED_BLOCK_SLOTS >= 8 && bytesEstimated > 8*mainAllocOverhead*growSize) slotMask = 0x7; - if (MAX_THREAD_USED_BLOCK_SLOTS >= 8 && bytesEstimated > 16*mainAllocOverhead*growSize) { growSize *= 2; } /* if the overhead is tiny, double the growSize */ - - /* set the thread local alloc block size */ - size_t defaultBlockSizeSwitch = PAGE_SIZE+maxAlignment; - - /* for sufficiently large scene we can increase the defaultBlockSize over the defaultBlockSizeSwitch size */ -#if 0 // we do not do this as a block size of 4160 if for some reason best for KNL - const size_t threadCount = TaskScheduler::threadCount(); - const size_t single_mode_factor = use_single_mode ? 1 : 2; - const size_t singleThreadBytes = single_mode_factor*threadLocalAllocOverhead*defaultBlockSizeSwitch; - if (bytesEstimated+(singleThreadBytes-1))/singleThreadBytes >= threadCount) - defaultBlockSize = min(max(defaultBlockSizeSwitch,bytesEstimated/(single_mode_factor*threadLocalAllocOverhead*threadCount)),growSize); - - /* otherwise we grow the defaultBlockSize up to defaultBlockSizeSwitch */ - else -#endif - defaultBlockSize = clamp(blockSize,size_t(1024),defaultBlockSizeSwitch); - - if (bytesEstimated == 0) { - maxGrowSize = maxAllocationSize; // special mode if builder cannot estimate tree size - defaultBlockSize = defaultBlockSizeSwitch; - } - log2_grow_size_scale = 0; - - if (device->alloc_main_block_size != 0) growSize = device->alloc_main_block_size; - if (device->alloc_num_main_slots >= 1 ) slotMask = 0x0; - if (device->alloc_num_main_slots >= 2 ) slotMask = 0x1; - if (device->alloc_num_main_slots >= 4 ) slotMask = 0x3; - if (device->alloc_num_main_slots >= 8 ) slotMask = 0x7; - if (device->alloc_thread_block_size != 0) defaultBlockSize = device->alloc_thread_block_size; - if (device->alloc_single_thread_alloc != -1) use_single_mode = device->alloc_single_thread_alloc; - } - - /*! initializes the allocator */ - void init(size_t bytesAllocate, size_t bytesReserve, size_t bytesEstimate) - { - internal_fix_used_blocks(); - /* distribute the allocation to multiple thread block slots */ - slotMask = MAX_THREAD_USED_BLOCK_SLOTS-1; // FIXME: remove - if (usedBlocks.load() || freeBlocks.load()) { reset(); return; } - if (bytesReserve == 0) bytesReserve = bytesAllocate; - freeBlocks = Block::create(device,bytesAllocate,bytesReserve,nullptr,atype); - estimatedSize = bytesEstimate; - initGrowSizeAndNumSlots(bytesEstimate,true); - } - - /*! initializes the allocator */ - void init_estimate(size_t bytesEstimate) - { - internal_fix_used_blocks(); - if (usedBlocks.load() || freeBlocks.load()) { reset(); return; } - /* single allocator mode ? */ - estimatedSize = bytesEstimate; - //initGrowSizeAndNumSlots(bytesEstimate,false); - initGrowSizeAndNumSlots(bytesEstimate,false); - - } - - /*! frees state not required after build */ - __forceinline void cleanup() - { - internal_fix_used_blocks(); - - /* unbind all thread local allocators */ - for (auto alloc : thread_local_allocators) alloc->unbind(this); - thread_local_allocators.clear(); - } - - /*! resets the allocator, memory blocks get reused */ - void reset () - { - internal_fix_used_blocks(); - - bytesUsed.store(0); - bytesFree.store(0); - bytesWasted.store(0); - - /* reset all used blocks and move them to begin of free block list */ - while (usedBlocks.load() != nullptr) { - usedBlocks.load()->reset_block(); - Block* nextUsedBlock = usedBlocks.load()->next; - usedBlocks.load()->next = freeBlocks.load(); - freeBlocks = usedBlocks.load(); - usedBlocks = nextUsedBlock; - } - - /* remove all shared blocks as they are re-added during build */ - freeBlocks.store(Block::remove_shared_blocks(freeBlocks.load())); - - for (size_t i=0; i<MAX_THREAD_USED_BLOCK_SLOTS; i++) - { - threadUsedBlocks[i] = nullptr; - threadBlocks[i] = nullptr; - } - - /* unbind all thread local allocators */ - for (auto alloc : thread_local_allocators) alloc->unbind(this); - thread_local_allocators.clear(); - } - - /*! frees all allocated memory */ - __forceinline void clear() - { - cleanup(); - bytesUsed.store(0); - bytesFree.store(0); - bytesWasted.store(0); - if (usedBlocks.load() != nullptr) usedBlocks.load()->clear_list(device); usedBlocks = nullptr; - if (freeBlocks.load() != nullptr) freeBlocks.load()->clear_list(device); freeBlocks = nullptr; - for (size_t i=0; i<MAX_THREAD_USED_BLOCK_SLOTS; i++) { - threadUsedBlocks[i] = nullptr; - threadBlocks[i] = nullptr; - } - primrefarray.clear(); - } - - __forceinline size_t incGrowSizeScale() - { - size_t scale = log2_grow_size_scale.fetch_add(1)+1; - return size_t(1) << min(size_t(16),scale); - } - - /*! thread safe allocation of memory */ - void* malloc(size_t& bytes, size_t align, bool partial) - { - assert(align <= maxAlignment); - - while (true) - { - /* allocate using current block */ - size_t threadID = TaskScheduler::threadID(); - size_t slot = threadID & slotMask; - Block* myUsedBlocks = threadUsedBlocks[slot]; - if (myUsedBlocks) { - void* ptr = myUsedBlocks->malloc(device,bytes,align,partial); - if (ptr) return ptr; - } - - /* throw error if allocation is too large */ - if (bytes > maxAllocationSize) - throw_RTCError(RTC_ERROR_UNKNOWN,"allocation is too large"); - - /* parallel block creation in case of no freeBlocks, avoids single global mutex */ - if (likely(freeBlocks.load() == nullptr)) - { -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(slotMutex[slot]); -#else - Lock<SpinLock> lock(slotMutex[slot]); -#endif - if (myUsedBlocks == threadUsedBlocks[slot]) { - const size_t alignedBytes = (bytes+(align-1)) & ~(align-1); - const size_t allocSize = max(min(growSize,maxGrowSize),alignedBytes); - assert(allocSize >= bytes); - threadBlocks[slot] = threadUsedBlocks[slot] = Block::create(device,allocSize,allocSize,threadBlocks[slot],atype); // FIXME: a large allocation might throw away a block here! - // FIXME: a direct allocation should allocate inside the block here, and not in the next loop! a different thread could do some allocation and make the large allocation fail. - } - continue; - } - - /* if this fails allocate new block */ - { -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(mutex); -#else - Lock<SpinLock> lock(mutex); -#endif - if (myUsedBlocks == threadUsedBlocks[slot]) - { - if (freeBlocks.load() != nullptr) { - Block* nextFreeBlock = freeBlocks.load()->next; - freeBlocks.load()->next = usedBlocks; - __memory_barrier(); - usedBlocks = freeBlocks.load(); - threadUsedBlocks[slot] = freeBlocks.load(); - freeBlocks = nextFreeBlock; - } else { - const size_t allocSize = min(growSize*incGrowSizeScale(),maxGrowSize); - usedBlocks = threadUsedBlocks[slot] = Block::create(device,allocSize,allocSize,usedBlocks,atype); // FIXME: a large allocation should get delivered directly, like above! - } - } - } - } - } - - /*! add new block */ - void addBlock(void* ptr, ssize_t bytes) - { -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(mutex); -#else - Lock<SpinLock> lock(mutex); -#endif - const size_t sizeof_Header = offsetof(Block,data[0]); - void* aptr = (void*) ((((size_t)ptr)+maxAlignment-1) & ~(maxAlignment-1)); - size_t ofs = (size_t) aptr - (size_t) ptr; - bytes -= ofs; - if (bytes < 4096) return; // ignore empty or very small blocks - freeBlocks = new (aptr) Block(SHARED,bytes-sizeof_Header,bytes-sizeof_Header,freeBlocks,ofs); - } - - /* special allocation only used from morton builder only a single time for each build */ - void* specialAlloc(size_t bytes) - { - assert(freeBlocks.load() != nullptr && freeBlocks.load()->getBlockAllocatedBytes() >= bytes); - return freeBlocks.load()->ptr(); - } - - struct Statistics - { - Statistics () - : bytesUsed(0), bytesFree(0), bytesWasted(0) {} - - Statistics (size_t bytesUsed, size_t bytesFree, size_t bytesWasted) - : bytesUsed(bytesUsed), bytesFree(bytesFree), bytesWasted(bytesWasted) {} - - Statistics (FastAllocator* alloc, AllocationType atype, bool huge_pages = false) - : bytesUsed(0), bytesFree(0), bytesWasted(0) - { - Block* usedBlocks = alloc->usedBlocks.load(); - Block* freeBlocks = alloc->freeBlocks.load(); - if (usedBlocks) bytesUsed += usedBlocks->getUsedBytes(atype,huge_pages); - if (freeBlocks) bytesFree += freeBlocks->getAllocatedBytes(atype,huge_pages); - if (usedBlocks) bytesFree += usedBlocks->getFreeBytes(atype,huge_pages); - if (freeBlocks) bytesWasted += freeBlocks->getWastedBytes(atype,huge_pages); - if (usedBlocks) bytesWasted += usedBlocks->getWastedBytes(atype,huge_pages); - } - - std::string str(size_t numPrimitives) - { - std::stringstream str; - str.setf(std::ios::fixed, std::ios::floatfield); - str << "used = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesUsed << " MB, " - << "free = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesFree << " MB, " - << "wasted = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesWasted << " MB, " - << "total = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesAllocatedTotal() << " MB, " - << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytesAllocatedTotal())/double(numPrimitives); - return str.str(); - } - - friend Statistics operator+ ( const Statistics& a, const Statistics& b) - { - return Statistics(a.bytesUsed+b.bytesUsed, - a.bytesFree+b.bytesFree, - a.bytesWasted+b.bytesWasted); - } - - size_t bytesAllocatedTotal() const { - return bytesUsed + bytesFree + bytesWasted; - } - - public: - size_t bytesUsed; - size_t bytesFree; - size_t bytesWasted; - }; - - Statistics getStatistics(AllocationType atype, bool huge_pages = false) { - return Statistics(this,atype,huge_pages); - } - - size_t getUsedBytes() { - return bytesUsed; - } - - size_t getWastedBytes() { - return bytesWasted; - } - - struct AllStatistics - { - AllStatistics (FastAllocator* alloc) - - : bytesUsed(alloc->bytesUsed), - bytesFree(alloc->bytesFree), - bytesWasted(alloc->bytesWasted), - stat_all(alloc,ANY_TYPE), - stat_malloc(alloc,ALIGNED_MALLOC), - stat_4K(alloc,EMBREE_OS_MALLOC,false), - stat_2M(alloc,EMBREE_OS_MALLOC,true), - stat_shared(alloc,SHARED) {} - - AllStatistics (size_t bytesUsed, - size_t bytesFree, - size_t bytesWasted, - Statistics stat_all, - Statistics stat_malloc, - Statistics stat_4K, - Statistics stat_2M, - Statistics stat_shared) - - : bytesUsed(bytesUsed), - bytesFree(bytesFree), - bytesWasted(bytesWasted), - stat_all(stat_all), - stat_malloc(stat_malloc), - stat_4K(stat_4K), - stat_2M(stat_2M), - stat_shared(stat_shared) {} - - friend AllStatistics operator+ (const AllStatistics& a, const AllStatistics& b) - { - return AllStatistics(a.bytesUsed+b.bytesUsed, - a.bytesFree+b.bytesFree, - a.bytesWasted+b.bytesWasted, - a.stat_all + b.stat_all, - a.stat_malloc + b.stat_malloc, - a.stat_4K + b.stat_4K, - a.stat_2M + b.stat_2M, - a.stat_shared + b.stat_shared); - } - - void print(size_t numPrimitives) - { - std::stringstream str0; - str0.setf(std::ios::fixed, std::ios::floatfield); - str0 << " alloc : " - << "used = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesUsed << " MB, " - << " " - << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytesUsed)/double(numPrimitives); - std::cout << str0.str() << std::endl; - - std::stringstream str1; - str1.setf(std::ios::fixed, std::ios::floatfield); - str1 << " alloc : " - << "used = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesUsed << " MB, " - << "free = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesFree << " MB, " - << "wasted = " << std::setw(7) << std::setprecision(3) << 1E-6f*bytesWasted << " MB, " - << "total = " << std::setw(7) << std::setprecision(3) << 1E-6f*(bytesUsed+bytesFree+bytesWasted) << " MB, " - << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytesUsed+bytesFree+bytesWasted)/double(numPrimitives); - std::cout << str1.str() << std::endl; - - std::cout << " total : " << stat_all.str(numPrimitives) << std::endl; - std::cout << " 4K : " << stat_4K.str(numPrimitives) << std::endl; - std::cout << " 2M : " << stat_2M.str(numPrimitives) << std::endl; - std::cout << " malloc: " << stat_malloc.str(numPrimitives) << std::endl; - std::cout << " shared: " << stat_shared.str(numPrimitives) << std::endl; - } - - private: - size_t bytesUsed; - size_t bytesFree; - size_t bytesWasted; - Statistics stat_all; - Statistics stat_malloc; - Statistics stat_4K; - Statistics stat_2M; - Statistics stat_shared; - }; - - void print_blocks() - { - std::cout << " estimatedSize = " << estimatedSize << ", slotMask = " << slotMask << ", use_single_mode = " << use_single_mode << ", maxGrowSize = " << maxGrowSize << ", defaultBlockSize = " << defaultBlockSize << std::endl; - - std::cout << " used blocks = "; - if (usedBlocks.load() != nullptr) usedBlocks.load()->print_list(); - std::cout << "[END]" << std::endl; - - std::cout << " free blocks = "; - if (freeBlocks.load() != nullptr) freeBlocks.load()->print_list(); - std::cout << "[END]" << std::endl; - } - - private: - - struct Block - { - static Block* create(MemoryMonitorInterface* device, size_t bytesAllocate, size_t bytesReserve, Block* next, AllocationType atype) - { - /* We avoid using os_malloc for small blocks as this could - * cause a risk of fragmenting the virtual address space and - * reach the limit of vm.max_map_count = 65k under Linux. */ - if (atype == EMBREE_OS_MALLOC && bytesAllocate < maxAllocationSize) - atype = ALIGNED_MALLOC; - - /* we need to additionally allocate some header */ - const size_t sizeof_Header = offsetof(Block,data[0]); - bytesAllocate = sizeof_Header+bytesAllocate; - bytesReserve = sizeof_Header+bytesReserve; - - /* consume full 4k pages with using os_malloc */ - if (atype == EMBREE_OS_MALLOC) { - bytesAllocate = ((bytesAllocate+PAGE_SIZE-1) & ~(PAGE_SIZE-1)); - bytesReserve = ((bytesReserve +PAGE_SIZE-1) & ~(PAGE_SIZE-1)); - } - - /* either use alignedMalloc or os_malloc */ - void *ptr = nullptr; - if (atype == ALIGNED_MALLOC) - { - /* special handling for default block size */ - if (bytesAllocate == (2*PAGE_SIZE_2M)) - { - const size_t alignment = maxAlignment; - if (device) device->memoryMonitor(bytesAllocate+alignment,false); - ptr = alignedMalloc(bytesAllocate,alignment); - - /* give hint to transparently convert these pages to 2MB pages */ - const size_t ptr_aligned_begin = ((size_t)ptr) & ~size_t(PAGE_SIZE_2M-1); - os_advise((void*)(ptr_aligned_begin + 0),PAGE_SIZE_2M); // may fail if no memory mapped before block - os_advise((void*)(ptr_aligned_begin + 1*PAGE_SIZE_2M),PAGE_SIZE_2M); - os_advise((void*)(ptr_aligned_begin + 2*PAGE_SIZE_2M),PAGE_SIZE_2M); // may fail if no memory mapped after block - - return new (ptr) Block(ALIGNED_MALLOC,bytesAllocate-sizeof_Header,bytesAllocate-sizeof_Header,next,alignment); - } - else - { - const size_t alignment = maxAlignment; - if (device) device->memoryMonitor(bytesAllocate+alignment,false); - ptr = alignedMalloc(bytesAllocate,alignment); - return new (ptr) Block(ALIGNED_MALLOC,bytesAllocate-sizeof_Header,bytesAllocate-sizeof_Header,next,alignment); - } - } - else if (atype == EMBREE_OS_MALLOC) - { - if (device) device->memoryMonitor(bytesAllocate,false); - bool huge_pages; ptr = os_malloc(bytesReserve,huge_pages); - return new (ptr) Block(EMBREE_OS_MALLOC,bytesAllocate-sizeof_Header,bytesReserve-sizeof_Header,next,0,huge_pages); - } - else - assert(false); - - return NULL; - } - - Block (AllocationType atype, size_t bytesAllocate, size_t bytesReserve, Block* next, size_t wasted, bool huge_pages = false) - : cur(0), allocEnd(bytesAllocate), reserveEnd(bytesReserve), next(next), wasted(wasted), atype(atype), huge_pages(huge_pages) - { - assert((((size_t)&data[0]) & (maxAlignment-1)) == 0); - } - - static Block* remove_shared_blocks(Block* head) - { - Block** prev_next = &head; - for (Block* block = head; block; block = block->next) { - if (block->atype == SHARED) *prev_next = block->next; - else prev_next = &block->next; - } - return head; - } - - void clear_list(MemoryMonitorInterface* device) - { - Block* block = this; - while (block) { - Block* next = block->next; - block->clear_block(device); - block = next; - } - } - - void clear_block (MemoryMonitorInterface* device) - { - const size_t sizeof_Header = offsetof(Block,data[0]); - const ssize_t sizeof_Alloced = wasted+sizeof_Header+getBlockAllocatedBytes(); - - if (atype == ALIGNED_MALLOC) { - alignedFree(this); - if (device) device->memoryMonitor(-sizeof_Alloced,true); - } - - else if (atype == EMBREE_OS_MALLOC) { - size_t sizeof_This = sizeof_Header+reserveEnd; - os_free(this,sizeof_This,huge_pages); - if (device) device->memoryMonitor(-sizeof_Alloced,true); - } - - else /* if (atype == SHARED) */ { - } - } - - void* malloc(MemoryMonitorInterface* device, size_t& bytes_in, size_t align, bool partial) - { - size_t bytes = bytes_in; - assert(align <= maxAlignment); - bytes = (bytes+(align-1)) & ~(align-1); - if (unlikely(cur+bytes > reserveEnd && !partial)) return nullptr; - const size_t i = cur.fetch_add(bytes); - if (unlikely(i+bytes > reserveEnd && !partial)) return nullptr; - if (unlikely(i > reserveEnd)) return nullptr; - bytes_in = bytes = min(bytes,reserveEnd-i); - - if (i+bytes > allocEnd) { - if (device) device->memoryMonitor(i+bytes-max(i,allocEnd),true); - } - return &data[i]; - } - - void* ptr() { - return &data[cur]; - } - - void reset_block () - { - allocEnd = max(allocEnd,(size_t)cur); - cur = 0; - } - - size_t getBlockUsedBytes() const { - return min(size_t(cur),reserveEnd); - } - - size_t getBlockFreeBytes() const { - return getBlockAllocatedBytes() - getBlockUsedBytes(); - } - - size_t getBlockAllocatedBytes() const { - return min(max(allocEnd,size_t(cur)),reserveEnd); - } - - size_t getBlockWastedBytes() const { - const size_t sizeof_Header = offsetof(Block,data[0]); - return sizeof_Header + wasted; - } - - size_t getBlockReservedBytes() const { - return reserveEnd; - } - - bool hasType(AllocationType atype_i, bool huge_pages_i) const - { - if (atype_i == ANY_TYPE ) return true; - else if (atype == EMBREE_OS_MALLOC) return atype_i == atype && huge_pages_i == huge_pages; - else return atype_i == atype; - } - - size_t getUsedBytes(AllocationType atype, bool huge_pages = false) const { - size_t bytes = 0; - for (const Block* block = this; block; block = block->next) { - if (!block->hasType(atype,huge_pages)) continue; - bytes += block->getBlockUsedBytes(); - } - return bytes; - } - - size_t getFreeBytes(AllocationType atype, bool huge_pages = false) const { - size_t bytes = 0; - for (const Block* block = this; block; block = block->next) { - if (!block->hasType(atype,huge_pages)) continue; - bytes += block->getBlockFreeBytes(); - } - return bytes; - } - - size_t getWastedBytes(AllocationType atype, bool huge_pages = false) const { - size_t bytes = 0; - for (const Block* block = this; block; block = block->next) { - if (!block->hasType(atype,huge_pages)) continue; - bytes += block->getBlockWastedBytes(); - } - return bytes; - } - - size_t getAllocatedBytes(AllocationType atype, bool huge_pages = false) const { - size_t bytes = 0; - for (const Block* block = this; block; block = block->next) { - if (!block->hasType(atype,huge_pages)) continue; - bytes += block->getBlockAllocatedBytes(); - } - return bytes; - } - - void print_list () - { - for (const Block* block = this; block; block = block->next) - block->print_block(); - } - - void print_block() const - { - if (atype == ALIGNED_MALLOC) std::cout << "A"; - else if (atype == EMBREE_OS_MALLOC) std::cout << "O"; - else if (atype == SHARED) std::cout << "S"; - if (huge_pages) std::cout << "H"; - size_t bytesUsed = getBlockUsedBytes(); - size_t bytesFree = getBlockFreeBytes(); - size_t bytesWasted = getBlockWastedBytes(); - std::cout << "[" << bytesUsed << ", " << bytesFree << ", " << bytesWasted << "] "; - } - - public: - std::atomic<size_t> cur; //!< current location of the allocator - std::atomic<size_t> allocEnd; //!< end of the allocated memory region - std::atomic<size_t> reserveEnd; //!< end of the reserved memory region - Block* next; //!< pointer to next block in list - size_t wasted; //!< amount of memory wasted through block alignment - AllocationType atype; //!< allocation mode of the block - bool huge_pages; //!< whether the block uses huge pages - char align[maxAlignment-5*sizeof(size_t)-sizeof(AllocationType)-sizeof(bool)]; //!< align data to maxAlignment - char data[1]; //!< here starts memory to use for allocations - }; - - private: - Device* device; - SpinLock mutex; - size_t slotMask; - std::atomic<Block*> threadUsedBlocks[MAX_THREAD_USED_BLOCK_SLOTS]; - std::atomic<Block*> usedBlocks; - std::atomic<Block*> freeBlocks; - - std::atomic<Block*> threadBlocks[MAX_THREAD_USED_BLOCK_SLOTS]; -#if defined(__aarch64__) && defined(BUILD_IOS) - std::mutex slotMutex[MAX_THREAD_USED_BLOCK_SLOTS]; -#else - SpinLock slotMutex[MAX_THREAD_USED_BLOCK_SLOTS]; -#endif - - bool use_single_mode; - size_t defaultBlockSize; - size_t estimatedSize; - size_t growSize; - size_t maxGrowSize; - std::atomic<size_t> log2_grow_size_scale; //!< log2 of scaling factor for grow size // FIXME: remove - std::atomic<size_t> bytesUsed; - std::atomic<size_t> bytesFree; - std::atomic<size_t> bytesWasted; - static __thread ThreadLocal2* thread_local_allocator2; - static SpinLock s_thread_local_allocators_lock; - static std::vector<std::unique_ptr<ThreadLocal2>> s_thread_local_allocators; -#if defined(__aarch64__) && defined(BUILD_IOS) - std::mutex thread_local_allocators_lock; -#else - SpinLock thread_local_allocators_lock; -#endif - std::vector<ThreadLocal2*> thread_local_allocators; - AllocationType atype; - mvector<PrimRef> primrefarray; //!< primrefarray used to allocate nodes - }; -} diff --git a/thirdparty/embree-aarch64/kernels/common/buffer.h b/thirdparty/embree-aarch64/kernels/common/buffer.h deleted file mode 100644 index 02d319c59d..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/buffer.h +++ /dev/null @@ -1,263 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" -#include "device.h" - -namespace embree -{ - /*! Implements an API data buffer object. This class may or may not own the data. */ - class Buffer : public RefCount - { - public: - /*! Buffer construction */ - Buffer() - : device(nullptr), ptr(nullptr), numBytes(0), shared(false) {} - - /*! Buffer construction */ - Buffer(Device* device, size_t numBytes_in, void* ptr_in = nullptr) - : device(device), numBytes(numBytes_in) - { - device->refInc(); - - if (ptr_in) - { - shared = true; - ptr = (char*)ptr_in; - } - else - { - shared = false; - alloc(); - } - } - - /*! Buffer destruction */ - ~Buffer() { - free(); - device->refDec(); - } - - /*! this class is not copyable */ - private: - Buffer(const Buffer& other) DELETED; // do not implement - Buffer& operator =(const Buffer& other) DELETED; // do not implement - - public: - /* inits and allocates the buffer */ - void create(Device* device_in, size_t numBytes_in) - { - init(device_in, numBytes_in); - alloc(); - } - - /* inits the buffer */ - void init(Device* device_in, size_t numBytes_in) - { - free(); - device = device_in; - ptr = nullptr; - numBytes = numBytes_in; - shared = false; - } - - /*! sets shared buffer */ - void set(Device* device_in, void* ptr_in, size_t numBytes_in) - { - free(); - device = device_in; - ptr = (char*)ptr_in; - if (numBytes_in != (size_t)-1) - numBytes = numBytes_in; - shared = true; - } - - /*! allocated buffer */ - void alloc() - { - if (device) - device->memoryMonitor(this->bytes(), false); - size_t b = (this->bytes()+15) & ssize_t(-16); - ptr = (char*)alignedMalloc(b,16); - } - - /*! frees the buffer */ - void free() - { - if (shared) return; - alignedFree(ptr); - if (device) - device->memoryMonitor(-ssize_t(this->bytes()), true); - ptr = nullptr; - } - - /*! gets buffer pointer */ - void* data() - { - /* report error if buffer is not existing */ - if (!device) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer specified"); - - /* return buffer */ - return ptr; - } - - /*! returns pointer to first element */ - __forceinline char* getPtr() const { - return ptr; - } - - /*! returns the number of bytes of the buffer */ - __forceinline size_t bytes() const { - return numBytes; - } - - /*! returns true of the buffer is not empty */ - __forceinline operator bool() const { - return ptr; - } - - public: - Device* device; //!< device to report memory usage to - char* ptr; //!< pointer to buffer data - size_t numBytes; //!< number of bytes in the buffer - bool shared; //!< set if memory is shared with application - }; - - /*! An untyped contiguous range of a buffer. This class does not own the buffer content. */ - class RawBufferView - { - public: - /*! Buffer construction */ - RawBufferView() - : ptr_ofs(nullptr), stride(0), num(0), format(RTC_FORMAT_UNDEFINED), modCounter(1), modified(true), userData(0) {} - - public: - /*! sets the buffer view */ - void set(const Ref<Buffer>& buffer_in, size_t offset_in, size_t stride_in, size_t num_in, RTCFormat format_in) - { - if ((offset_in + stride_in * num_in) > (stride_in * buffer_in->numBytes)) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "buffer range out of bounds"); - - ptr_ofs = buffer_in->ptr + offset_in; - stride = stride_in; - num = num_in; - format = format_in; - modCounter++; - modified = true; - buffer = buffer_in; - } - - /*! returns pointer to the first element */ - __forceinline char* getPtr() const { - return ptr_ofs; - } - - /*! returns pointer to the i'th element */ - __forceinline char* getPtr(size_t i) const - { - assert(i<num); - return ptr_ofs + i*stride; - } - - /*! returns the number of elements of the buffer */ - __forceinline size_t size() const { - return num; - } - - /*! returns the number of bytes of the buffer */ - __forceinline size_t bytes() const { - return num*stride; - } - - /*! returns the buffer stride */ - __forceinline unsigned getStride() const - { - assert(stride <= unsigned(inf)); - return unsigned(stride); - } - - /*! return the buffer format */ - __forceinline RTCFormat getFormat() const { - return format; - } - - /*! mark buffer as modified or unmodified */ - __forceinline void setModified() { - modCounter++; - modified = true; - } - - /*! mark buffer as modified or unmodified */ - __forceinline bool isModified(unsigned int otherModCounter) const { - return modCounter > otherModCounter; - } - - /*! mark buffer as modified or unmodified */ - __forceinline bool isLocalModified() const { - return modified; - } - - /*! clear local modified flag */ - __forceinline void clearLocalModified() { - modified = false; - } - - /*! returns true of the buffer is not empty */ - __forceinline operator bool() const { - return ptr_ofs; - } - - /*! checks padding to 16 byte check, fails hard */ - __forceinline void checkPadding16() const - { - if (ptr_ofs && num) - volatile int MAYBE_UNUSED w = *((int*)getPtr(size()-1)+3); // FIXME: is failing hard avoidable? - } - - public: - char* ptr_ofs; //!< base pointer plus offset - size_t stride; //!< stride of the buffer in bytes - size_t num; //!< number of elements in the buffer - RTCFormat format; //!< format of the buffer - unsigned int modCounter; //!< version ID of this buffer - bool modified; //!< local modified data - int userData; //!< special data - Ref<Buffer> buffer; //!< reference to the parent buffer - }; - - /*! A typed contiguous range of a buffer. This class does not own the buffer content. */ - template<typename T> - class BufferView : public RawBufferView - { - public: - typedef T value_type; - - /*! access to the ith element of the buffer */ - __forceinline T& operator [](size_t i) { assert(i<num); return *(T*)(ptr_ofs + i*stride); } - __forceinline const T& operator [](size_t i) const { assert(i<num); return *(T*)(ptr_ofs + i*stride); } - }; - - template<> - class BufferView<Vec3fa> : public RawBufferView - { - public: - typedef Vec3fa value_type; - - /*! access to the ith element of the buffer */ - __forceinline const Vec3fa operator [](size_t i) const - { - assert(i<num); - return Vec3fa(vfloat4::loadu((float*)(ptr_ofs + i*stride))); - } - - /*! writes the i'th element */ - __forceinline void store(size_t i, const Vec3fa& v) - { - assert(i<num); - vfloat4::storeu((float*)(ptr_ofs + i*stride), (vfloat4)v); - } - }; -} diff --git a/thirdparty/embree-aarch64/kernels/common/builder.h b/thirdparty/embree-aarch64/kernels/common/builder.h deleted file mode 100644 index d2a1cfe3ce..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/builder.h +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" -#include "accel.h" - -namespace embree -{ -#define MODE_HIGH_QUALITY (1<<8) - - /*! virtual interface for all hierarchy builders */ - class Builder : public RefCount { - public: - - static const size_t DEFAULT_SINGLE_THREAD_THRESHOLD = 1024; - - /*! initiates the hierarchy builder */ - virtual void build() = 0; - - /*! notifies the builder about the deletion of some geometry */ - virtual void deleteGeometry(size_t geomID) {}; - - /*! clears internal builder state */ - virtual void clear() = 0; - }; - - /*! virtual interface for progress monitor class */ - struct BuildProgressMonitor { - virtual void operator() (size_t dn) const = 0; - }; - - /*! build the progress monitor interface from a closure */ - template<typename Closure> - struct ProgressMonitorClosure : BuildProgressMonitor - { - public: - ProgressMonitorClosure (const Closure& closure) : closure(closure) {} - void operator() (size_t dn) const { closure(dn); } - private: - const Closure closure; - }; - template<typename Closure> __forceinline const ProgressMonitorClosure<Closure> BuildProgressMonitorFromClosure(const Closure& closure) { - return ProgressMonitorClosure<Closure>(closure); - } - - struct LineSegments; - struct TriangleMesh; - struct QuadMesh; - struct UserGeometry; - - class Scene; - - typedef void (*createLineSegmentsAccelTy)(Scene* scene, LineSegments* mesh, AccelData*& accel, Builder*& builder); - typedef void (*createTriangleMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder); - typedef void (*createQuadMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder); - typedef void (*createUserGeometryAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder); - -} diff --git a/thirdparty/embree-aarch64/kernels/common/context.h b/thirdparty/embree-aarch64/kernels/common/context.h deleted file mode 100644 index d0185a74f2..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/context.h +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" -#include "rtcore.h" -#include "point_query.h" - -namespace embree -{ - class Scene; - - struct IntersectContext - { - public: - __forceinline IntersectContext(Scene* scene, RTCIntersectContext* user_context) - : scene(scene), user(user_context) {} - - __forceinline bool hasContextFilter() const { - return user->filter != nullptr; - } - - __forceinline bool isCoherent() const { - return embree::isCoherent(user->flags); - } - - __forceinline bool isIncoherent() const { - return embree::isIncoherent(user->flags); - } - - public: - Scene* scene; - RTCIntersectContext* user; - }; - - template<int M, typename Geometry> - __forceinline Vec4vf<M> enlargeRadiusToMinWidth(const IntersectContext* context, const Geometry* geom, const Vec3vf<M>& ray_org, const Vec4vf<M>& v) - { -#if RTC_MIN_WIDTH - const vfloat<M> d = length(Vec3vf<M>(v) - ray_org); - const vfloat<M> r = clamp(context->user->minWidthDistanceFactor*d, v.w, geom->maxRadiusScale*v.w); - return Vec4vf<M>(v.x,v.y,v.z,r); -#else - return v; -#endif - } - - template<typename Geometry> - __forceinline Vec3ff enlargeRadiusToMinWidth(const IntersectContext* context, const Geometry* geom, const Vec3fa& ray_org, const Vec3ff& v) - { -#if RTC_MIN_WIDTH - const float d = length(Vec3fa(v) - ray_org); - const float r = clamp(context->user->minWidthDistanceFactor*d, v.w, geom->maxRadiusScale*v.w); - return Vec3ff(v.x,v.y,v.z,r); -#else - return v; -#endif - } - - enum PointQueryType - { - POINT_QUERY_TYPE_UNDEFINED = 0, - POINT_QUERY_TYPE_SPHERE = 1, - POINT_QUERY_TYPE_AABB = 2, - }; - - typedef bool (*PointQueryFunction)(struct RTCPointQueryFunctionArguments* args); - - struct PointQueryContext - { - public: - __forceinline PointQueryContext(Scene* scene, - PointQuery* query_ws, - PointQueryType query_type, - PointQueryFunction func, - RTCPointQueryContext* userContext, - float similarityScale, - void* userPtr) - : scene(scene) - , query_ws(query_ws) - , query_type(query_type) - , func(func) - , userContext(userContext) - , similarityScale(similarityScale) - , userPtr(userPtr) - , primID(RTC_INVALID_GEOMETRY_ID) - , geomID(RTC_INVALID_GEOMETRY_ID) - , query_radius(query_ws->radius) - { - if (query_type == POINT_QUERY_TYPE_AABB) { - assert(similarityScale == 0.f); - updateAABB(); - } - if (userContext->instStackSize == 0) { - assert(similarityScale == 1.f); - } - } - - public: - __forceinline void updateAABB() - { - if (likely(query_ws->radius == (float)inf || userContext->instStackSize == 0)) { - query_radius = Vec3fa(query_ws->radius); - return; - } - - const AffineSpace3fa m = AffineSpace3fa_load_unaligned((AffineSpace3fa*)userContext->world2inst[userContext->instStackSize-1]); - BBox3fa bbox(Vec3fa(-query_ws->radius), Vec3fa(query_ws->radius)); - bbox = xfmBounds(m, bbox); - query_radius = 0.5f * (bbox.upper - bbox.lower); - } - -public: - Scene* scene; - - PointQuery* query_ws; // the original world space point query - PointQueryType query_type; - PointQueryFunction func; - RTCPointQueryContext* userContext; - const float similarityScale; - - void* userPtr; - - unsigned int primID; - unsigned int geomID; - - Vec3fa query_radius; // used if the query is converted to an AABB internally - }; -} - diff --git a/thirdparty/embree-aarch64/kernels/common/default.h b/thirdparty/embree-aarch64/kernels/common/default.h deleted file mode 100644 index 709119163b..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/default.h +++ /dev/null @@ -1,273 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../../common/sys/platform.h" -#include "../../common/sys/sysinfo.h" -#include "../../common/sys/thread.h" -#include "../../common/sys/alloc.h" -#include "../../common/sys/ref.h" -#include "../../common/sys/intrinsics.h" -#include "../../common/sys/atomic.h" -#include "../../common/sys/mutex.h" -#include "../../common/sys/vector.h" -#include "../../common/sys/array.h" -#include "../../common/sys/string.h" -#include "../../common/sys/regression.h" -#include "../../common/sys/vector.h" - -#include "../../common/math/math.h" -#include "../../common/math/transcendental.h" -#include "../../common/simd/simd.h" -#include "../../common/math/vec2.h" -#include "../../common/math/vec3.h" -#include "../../common/math/vec4.h" -#include "../../common/math/vec2fa.h" -#include "../../common/math/vec3fa.h" -#include "../../common/math/interval.h" -#include "../../common/math/bbox.h" -#include "../../common/math/obbox.h" -#include "../../common/math/lbbox.h" -#include "../../common/math/linearspace2.h" -#include "../../common/math/linearspace3.h" -#include "../../common/math/affinespace.h" -#include "../../common/math/range.h" -#include "../../common/lexers/tokenstream.h" - -#include "../../common/tasking/taskscheduler.h" - -#define COMMA , - -#include "../config.h" -#include "isa.h" -#include "stat.h" -#include "profile.h" -#include "rtcore.h" -#include "vector.h" -#include "state.h" -#include "instance_stack.h" - -#include <vector> -#include <map> -#include <algorithm> -#include <functional> -#include <utility> -#include <sstream> - -#if !defined(_DEBUG) && defined(BUILD_IOS) -#undef assert -#define assert(_EXPR) -#endif - -namespace embree -{ - //////////////////////////////////////////////////////////////////////////////// - /// Vec2 shortcuts - //////////////////////////////////////////////////////////////////////////////// - - template<int N> using Vec2vf = Vec2<vfloat<N>>; - template<int N> using Vec2vd = Vec2<vdouble<N>>; - template<int N> using Vec2vr = Vec2<vreal<N>>; - template<int N> using Vec2vi = Vec2<vint<N>>; - template<int N> using Vec2vl = Vec2<vllong<N>>; - template<int N> using Vec2vb = Vec2<vbool<N>>; - template<int N> using Vec2vbf = Vec2<vboolf<N>>; - template<int N> using Vec2vbd = Vec2<vboold<N>>; - - typedef Vec2<vfloat4> Vec2vf4; - typedef Vec2<vdouble4> Vec2vd4; - typedef Vec2<vreal4> Vec2vr4; - typedef Vec2<vint4> Vec2vi4; - typedef Vec2<vllong4> Vec2vl4; - typedef Vec2<vbool4> Vec2vb4; - typedef Vec2<vboolf4> Vec2vbf4; - typedef Vec2<vboold4> Vec2vbd4; - - typedef Vec2<vfloat8> Vec2vf8; - typedef Vec2<vdouble8> Vec2vd8; - typedef Vec2<vreal8> Vec2vr8; - typedef Vec2<vint8> Vec2vi8; - typedef Vec2<vllong8> Vec2vl8; - typedef Vec2<vbool8> Vec2vb8; - typedef Vec2<vboolf8> Vec2vbf8; - typedef Vec2<vboold8> Vec2vbd8; - - typedef Vec2<vfloat16> Vec2vf16; - typedef Vec2<vdouble16> Vec2vd16; - typedef Vec2<vreal16> Vec2vr16; - typedef Vec2<vint16> Vec2vi16; - typedef Vec2<vllong16> Vec2vl16; - typedef Vec2<vbool16> Vec2vb16; - typedef Vec2<vboolf16> Vec2vbf16; - typedef Vec2<vboold16> Vec2vbd16; - - typedef Vec2<vfloatx> Vec2vfx; - typedef Vec2<vdoublex> Vec2vdx; - typedef Vec2<vrealx> Vec2vrx; - typedef Vec2<vintx> Vec2vix; - typedef Vec2<vllongx> Vec2vlx; - typedef Vec2<vboolx> Vec2vbx; - typedef Vec2<vboolfx> Vec2vbfx; - typedef Vec2<vbooldx> Vec2vbdx; - - //////////////////////////////////////////////////////////////////////////////// - /// Vec3 shortcuts - //////////////////////////////////////////////////////////////////////////////// - - template<int N> using Vec3vf = Vec3<vfloat<N>>; - template<int N> using Vec3vd = Vec3<vdouble<N>>; - template<int N> using Vec3vr = Vec3<vreal<N>>; - template<int N> using Vec3vi = Vec3<vint<N>>; - template<int N> using Vec3vl = Vec3<vllong<N>>; - template<int N> using Vec3vb = Vec3<vbool<N>>; - template<int N> using Vec3vbf = Vec3<vboolf<N>>; - template<int N> using Vec3vbd = Vec3<vboold<N>>; - - typedef Vec3<vfloat4> Vec3vf4; - typedef Vec3<vdouble4> Vec3vd4; - typedef Vec3<vreal4> Vec3vr4; - typedef Vec3<vint4> Vec3vi4; - typedef Vec3<vllong4> Vec3vl4; - typedef Vec3<vbool4> Vec3vb4; - typedef Vec3<vboolf4> Vec3vbf4; - typedef Vec3<vboold4> Vec3vbd4; - - typedef Vec3<vfloat8> Vec3vf8; - typedef Vec3<vdouble8> Vec3vd8; - typedef Vec3<vreal8> Vec3vr8; - typedef Vec3<vint8> Vec3vi8; - typedef Vec3<vllong8> Vec3vl8; - typedef Vec3<vbool8> Vec3vb8; - typedef Vec3<vboolf8> Vec3vbf8; - typedef Vec3<vboold8> Vec3vbd8; - - typedef Vec3<vfloat16> Vec3vf16; - typedef Vec3<vdouble16> Vec3vd16; - typedef Vec3<vreal16> Vec3vr16; - typedef Vec3<vint16> Vec3vi16; - typedef Vec3<vllong16> Vec3vl16; - typedef Vec3<vbool16> Vec3vb16; - typedef Vec3<vboolf16> Vec3vbf16; - typedef Vec3<vboold16> Vec3vbd16; - - typedef Vec3<vfloatx> Vec3vfx; - typedef Vec3<vdoublex> Vec3vdx; - typedef Vec3<vrealx> Vec3vrx; - typedef Vec3<vintx> Vec3vix; - typedef Vec3<vllongx> Vec3vlx; - typedef Vec3<vboolx> Vec3vbx; - typedef Vec3<vboolfx> Vec3vbfx; - typedef Vec3<vbooldx> Vec3vbdx; - - //////////////////////////////////////////////////////////////////////////////// - /// Vec4 shortcuts - //////////////////////////////////////////////////////////////////////////////// - - template<int N> using Vec4vf = Vec4<vfloat<N>>; - template<int N> using Vec4vd = Vec4<vdouble<N>>; - template<int N> using Vec4vr = Vec4<vreal<N>>; - template<int N> using Vec4vi = Vec4<vint<N>>; - template<int N> using Vec4vl = Vec4<vllong<N>>; - template<int N> using Vec4vb = Vec4<vbool<N>>; - template<int N> using Vec4vbf = Vec4<vboolf<N>>; - template<int N> using Vec4vbd = Vec4<vboold<N>>; - - typedef Vec4<vfloat4> Vec4vf4; - typedef Vec4<vdouble4> Vec4vd4; - typedef Vec4<vreal4> Vec4vr4; - typedef Vec4<vint4> Vec4vi4; - typedef Vec4<vllong4> Vec4vl4; - typedef Vec4<vbool4> Vec4vb4; - typedef Vec4<vboolf4> Vec4vbf4; - typedef Vec4<vboold4> Vec4vbd4; - - typedef Vec4<vfloat8> Vec4vf8; - typedef Vec4<vdouble8> Vec4vd8; - typedef Vec4<vreal8> Vec4vr8; - typedef Vec4<vint8> Vec4vi8; - typedef Vec4<vllong8> Vec4vl8; - typedef Vec4<vbool8> Vec4vb8; - typedef Vec4<vboolf8> Vec4vbf8; - typedef Vec4<vboold8> Vec4vbd8; - - typedef Vec4<vfloat16> Vec4vf16; - typedef Vec4<vdouble16> Vec4vd16; - typedef Vec4<vreal16> Vec4vr16; - typedef Vec4<vint16> Vec4vi16; - typedef Vec4<vllong16> Vec4vl16; - typedef Vec4<vbool16> Vec4vb16; - typedef Vec4<vboolf16> Vec4vbf16; - typedef Vec4<vboold16> Vec4vbd16; - - typedef Vec4<vfloatx> Vec4vfx; - typedef Vec4<vdoublex> Vec4vdx; - typedef Vec4<vrealx> Vec4vrx; - typedef Vec4<vintx> Vec4vix; - typedef Vec4<vllongx> Vec4vlx; - typedef Vec4<vboolx> Vec4vbx; - typedef Vec4<vboolfx> Vec4vbfx; - typedef Vec4<vbooldx> Vec4vbdx; - - //////////////////////////////////////////////////////////////////////////////// - /// Other shortcuts - //////////////////////////////////////////////////////////////////////////////// - - template<int N> using BBox3vf = BBox<Vec3vf<N>>; - typedef BBox<Vec3vf4> BBox3vf4; - typedef BBox<Vec3vf8> BBox3vf8; - typedef BBox<Vec3vf16> BBox3vf16; - - /* calculate time segment itime and fractional time ftime */ - __forceinline int getTimeSegment(float time, float numTimeSegments, float& ftime) - { - const float timeScaled = time * numTimeSegments; - const float itimef = clamp(floorf(timeScaled), 0.0f, numTimeSegments-1.0f); - ftime = timeScaled - itimef; - return int(itimef); - } - - __forceinline int getTimeSegment(float time, float start_time, float end_time, float numTimeSegments, float& ftime) - { - const float timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments; - const float itimef = clamp(floorf(timeScaled), 0.0f, numTimeSegments-1.0f); - ftime = timeScaled - itimef; - return int(itimef); - } - - template<int N> - __forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime) - { - const vfloat<N> timeScaled = time * numTimeSegments; - const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f); - ftime = timeScaled - itimef; - return vint<N>(itimef); - } - - template<int N> - __forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& start_time, const vfloat<N>& end_time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime) - { - const vfloat<N> timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments; - const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f); - ftime = timeScaled - itimef; - return vint<N>(itimef); - } - - /* calculate overlapping time segment range */ - __forceinline range<int> getTimeSegmentRange(const BBox1f& time_range, float numTimeSegments) - { - const float round_up = 1.0f+2.0f*float(ulp); // corrects inaccuracies to precisely match time step - const float round_down = 1.0f-2.0f*float(ulp); - const int itime_lower = (int)max(floor(round_up *time_range.lower*numTimeSegments), 0.0f); - const int itime_upper = (int)min(ceil (round_down*time_range.upper*numTimeSegments), numTimeSegments); - return make_range(itime_lower, itime_upper); - } - - /* calculate overlapping time segment range */ - __forceinline range<int> getTimeSegmentRange(const BBox1f& range, BBox1f time_range, float numTimeSegments) - { - const float lower = (range.lower-time_range.lower)/time_range.size(); - const float upper = (range.upper-time_range.lower)/time_range.size(); - return getTimeSegmentRange(BBox1f(lower,upper),numTimeSegments); - } -} diff --git a/thirdparty/embree-aarch64/kernels/common/device.cpp b/thirdparty/embree-aarch64/kernels/common/device.cpp deleted file mode 100644 index 16ec11b892..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/device.cpp +++ /dev/null @@ -1,567 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "device.h" -#include "../hash.h" -#include "scene_triangle_mesh.h" -#include "scene_user_geometry.h" -#include "scene_instance.h" -#include "scene_curves.h" -#include "scene_subdiv_mesh.h" - -#include "../subdiv/tessellation_cache.h" - -#include "acceln.h" -#include "geometry.h" - -#include "../geometry/cylinder.h" - -#include "../bvh/bvh4_factory.h" -#include "../bvh/bvh8_factory.h" - -#include "../../common/tasking/taskscheduler.h" -#include "../../common/sys/alloc.h" - -namespace embree -{ - /*! some global variables that can be set via rtcSetParameter1i for debugging purposes */ - ssize_t Device::debug_int0 = 0; - ssize_t Device::debug_int1 = 0; - ssize_t Device::debug_int2 = 0; - ssize_t Device::debug_int3 = 0; - - DECLARE_SYMBOL2(RayStreamFilterFuncs,rayStreamFilterFuncs); - - static MutexSys g_mutex; - static std::map<Device*,size_t> g_cache_size_map; - static std::map<Device*,size_t> g_num_threads_map; - - Device::Device (const char* cfg) - { - /* check that CPU supports lowest ISA */ - if (!hasISA(ISA)) { - throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support " ISA_STR); - } - - /* set default frequency level for detected CPU */ - switch (getCPUModel()) { - case CPU::UNKNOWN: frequency_level = FREQUENCY_SIMD256; break; - case CPU::XEON_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break; - case CPU::CORE_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break; - case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD128; break; - case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD128; break; - case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD128; break; - case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD128; break; - case CPU::XEON_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break; - case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break; - case CPU::XEON_BROADWELL: frequency_level = FREQUENCY_SIMD256; break; - case CPU::CORE_BROADWELL: frequency_level = FREQUENCY_SIMD256; break; - case CPU::XEON_HASWELL: frequency_level = FREQUENCY_SIMD256; break; - case CPU::CORE_HASWELL: frequency_level = FREQUENCY_SIMD256; break; - case CPU::XEON_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break; - case CPU::CORE_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break; - case CPU::SANDY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break; - case CPU::NEHALEM: frequency_level = FREQUENCY_SIMD128; break; - case CPU::CORE2: frequency_level = FREQUENCY_SIMD128; break; - case CPU::CORE1: frequency_level = FREQUENCY_SIMD128; break; - } - - /* initialize global state */ -#if defined(EMBREE_CONFIG) - State::parseString(EMBREE_CONFIG); -#endif - State::parseString(cfg); - if (!ignore_config_files && FileName::executableFolder() != FileName("")) - State::parseFile(FileName::executableFolder()+FileName(".embree" TOSTRING(RTC_VERSION_MAJOR))); - if (!ignore_config_files && FileName::homeFolder() != FileName("")) - State::parseFile(FileName::homeFolder()+FileName(".embree" TOSTRING(RTC_VERSION_MAJOR))); - State::verify(); - - /* check whether selected ISA is supported by the HW, as the user could have forced an unsupported ISA */ - if (!checkISASupport()) { - throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support selected ISA"); - } - - /*! do some internal tests */ - assert(isa::Cylinder::verify()); - - /*! enable huge page support if desired */ -#if defined(__WIN32__) - if (State::enable_selockmemoryprivilege) - State::hugepages_success &= win_enable_selockmemoryprivilege(State::verbosity(3)); -#endif - State::hugepages_success &= os_init(State::hugepages,State::verbosity(3)); - - /*! set tessellation cache size */ - setCacheSize( State::tessellation_cache_size ); - - /*! enable some floating point exceptions to catch bugs */ - if (State::float_exceptions) - { - int exceptions = _MM_MASK_MASK; - //exceptions &= ~_MM_MASK_INVALID; - exceptions &= ~_MM_MASK_DENORM; - exceptions &= ~_MM_MASK_DIV_ZERO; - //exceptions &= ~_MM_MASK_OVERFLOW; - //exceptions &= ~_MM_MASK_UNDERFLOW; - //exceptions &= ~_MM_MASK_INEXACT; - _MM_SET_EXCEPTION_MASK(exceptions); - } - - /* print info header */ - if (State::verbosity(1)) - print(); - if (State::verbosity(2)) - State::print(); - - /* register all algorithms */ - bvh4_factory = make_unique(new BVH4Factory(enabled_builder_cpu_features, enabled_cpu_features)); - -#if defined(EMBREE_TARGET_SIMD8) - bvh8_factory = make_unique(new BVH8Factory(enabled_builder_cpu_features, enabled_cpu_features)); -#endif - - /* setup tasking system */ - initTaskingSystem(numThreads); - - /* ray stream SOA to AOS conversion */ -#if defined(EMBREE_RAY_PACKETS) - RayStreamFilterFuncsType rayStreamFilterFuncs; - SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(enabled_cpu_features,rayStreamFilterFuncs); - rayStreamFilters = rayStreamFilterFuncs(); -#endif - } - - Device::~Device () - { - setCacheSize(0); - exitTaskingSystem(); - } - - std::string getEnabledTargets() - { - std::string v; -#if defined(EMBREE_TARGET_SSE2) - v += "SSE2 "; -#endif -#if defined(EMBREE_TARGET_SSE42) - v += "SSE4.2 "; -#endif -#if defined(EMBREE_TARGET_AVX) - v += "AVX "; -#endif -#if defined(EMBREE_TARGET_AVX2) - v += "AVX2 "; -#endif -#if defined(EMBREE_TARGET_AVX512KNL) - v += "AVX512KNL "; -#endif -#if defined(EMBREE_TARGET_AVX512SKX) - v += "AVX512SKX "; -#endif - return v; - } - - std::string getEmbreeFeatures() - { - std::string v; -#if defined(EMBREE_RAY_MASK) - v += "raymasks "; -#endif -#if defined (EMBREE_BACKFACE_CULLING) - v += "backfaceculling "; -#endif -#if defined (EMBREE_BACKFACE_CULLING_CURVES) - v += "backfacecullingcurves "; -#endif -#if defined(EMBREE_FILTER_FUNCTION) - v += "intersection_filter "; -#endif -#if defined (EMBREE_COMPACT_POLYS) - v += "compact_polys "; -#endif - return v; - } - - void Device::print() - { - const int cpu_features = getCPUFeatures(); - std::cout << std::endl; - std::cout << "Embree Ray Tracing Kernels " << RTC_VERSION_STRING << " (" << RTC_HASH << ")" << std::endl; - std::cout << " Compiler : " << getCompilerName() << std::endl; - std::cout << " Build : "; -#if defined(DEBUG) - std::cout << "Debug " << std::endl; -#else - std::cout << "Release " << std::endl; -#endif - std::cout << " Platform : " << getPlatformName() << std::endl; - std::cout << " CPU : " << stringOfCPUModel(getCPUModel()) << " (" << getCPUVendor() << ")" << std::endl; - std::cout << " Threads : " << getNumberOfLogicalThreads() << std::endl; - std::cout << " ISA : " << stringOfCPUFeatures(cpu_features) << std::endl; - std::cout << " Targets : " << supportedTargetList(cpu_features) << std::endl; - const bool hasFTZ = _mm_getcsr() & _MM_FLUSH_ZERO_ON; - const bool hasDAZ = _mm_getcsr() & _MM_DENORMALS_ZERO_ON; - std::cout << " MXCSR : " << "FTZ=" << hasFTZ << ", DAZ=" << hasDAZ << std::endl; - std::cout << " Config" << std::endl; - std::cout << " Threads : " << (numThreads ? toString(numThreads) : std::string("default")) << std::endl; - std::cout << " ISA : " << stringOfCPUFeatures(enabled_cpu_features) << std::endl; - std::cout << " Targets : " << supportedTargetList(enabled_cpu_features) << " (supported)" << std::endl; - std::cout << " " << getEnabledTargets() << " (compile time enabled)" << std::endl; - std::cout << " Features: " << getEmbreeFeatures() << std::endl; - std::cout << " Tasking : "; -#if defined(TASKING_TBB) - std::cout << "TBB" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR << " "; - #if TBB_INTERFACE_VERSION >= 12002 - std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << TBB_runtime_interface_version() << " "; - #else - std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << tbb::TBB_runtime_interface_version() << " "; - #endif -#endif -#if defined(TASKING_INTERNAL) - std::cout << "internal_tasking_system "; -#endif -#if defined(TASKING_GCD) && defined(BUILD_IOS) - std::cout << "GCD tasking system "; -#endif -#if defined(TASKING_PPL) - std::cout << "PPL "; -#endif - std::cout << std::endl; - - /* check of FTZ and DAZ flags are set in CSR */ - if (!hasFTZ || !hasDAZ) - { -#if !defined(_DEBUG) - if (State::verbosity(1)) -#endif - { - std::cout << std::endl; - std::cout << "================================================================================" << std::endl; - std::cout << " WARNING: \"Flush to Zero\" or \"Denormals are Zero\" mode not enabled " << std::endl - << " in the MXCSR control and status register. This can have a severe " << std::endl - << " performance impact. Please enable these modes for each application " << std::endl - << " thread the following way:" << std::endl - << std::endl - << " #include \"xmmintrin.h\"" << std::endl - << " #include \"pmmintrin.h\"" << std::endl - << std::endl - << " _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);" << std::endl - << " _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);" << std::endl; - std::cout << "================================================================================" << std::endl; - std::cout << std::endl; - } - } - std::cout << std::endl; - } - - void Device::setDeviceErrorCode(RTCError error) - { - RTCError* stored_error = errorHandler.error(); - if (*stored_error == RTC_ERROR_NONE) - *stored_error = error; - } - - RTCError Device::getDeviceErrorCode() - { - RTCError* stored_error = errorHandler.error(); - RTCError error = *stored_error; - *stored_error = RTC_ERROR_NONE; - return error; - } - - void Device::setThreadErrorCode(RTCError error) - { - RTCError* stored_error = g_errorHandler.error(); - if (*stored_error == RTC_ERROR_NONE) - *stored_error = error; - } - - RTCError Device::getThreadErrorCode() - { - RTCError* stored_error = g_errorHandler.error(); - RTCError error = *stored_error; - *stored_error = RTC_ERROR_NONE; - return error; - } - - void Device::process_error(Device* device, RTCError error, const char* str) - { - /* store global error code when device construction failed */ - if (!device) - return setThreadErrorCode(error); - - /* print error when in verbose mode */ - if (device->verbosity(1)) - { - switch (error) { - case RTC_ERROR_NONE : std::cerr << "Embree: No error"; break; - case RTC_ERROR_UNKNOWN : std::cerr << "Embree: Unknown error"; break; - case RTC_ERROR_INVALID_ARGUMENT : std::cerr << "Embree: Invalid argument"; break; - case RTC_ERROR_INVALID_OPERATION: std::cerr << "Embree: Invalid operation"; break; - case RTC_ERROR_OUT_OF_MEMORY : std::cerr << "Embree: Out of memory"; break; - case RTC_ERROR_UNSUPPORTED_CPU : std::cerr << "Embree: Unsupported CPU"; break; - default : std::cerr << "Embree: Invalid error code"; break; - }; - if (str) std::cerr << ", (" << str << ")"; - std::cerr << std::endl; - } - - /* call user specified error callback */ - if (device->error_function) - device->error_function(device->error_function_userptr,error,str); - - /* record error code */ - device->setDeviceErrorCode(error); - } - - void Device::memoryMonitor(ssize_t bytes, bool post) - { - if (State::memory_monitor_function && bytes != 0) { - if (!State::memory_monitor_function(State::memory_monitor_userptr,bytes,post)) { - if (bytes > 0) { // only throw exception when we allocate memory to never throw inside a destructor - throw_RTCError(RTC_ERROR_OUT_OF_MEMORY,"memory monitor forced termination"); - } - } - } - } - - size_t getMaxNumThreads() - { - size_t maxNumThreads = 0; - for (std::map<Device*,size_t>::iterator i=g_num_threads_map.begin(); i != g_num_threads_map.end(); i++) - maxNumThreads = max(maxNumThreads, (*i).second); - if (maxNumThreads == 0) - maxNumThreads = std::numeric_limits<size_t>::max(); - return maxNumThreads; - } - - size_t getMaxCacheSize() - { - size_t maxCacheSize = 0; - for (std::map<Device*,size_t>::iterator i=g_cache_size_map.begin(); i!= g_cache_size_map.end(); i++) - maxCacheSize = max(maxCacheSize, (*i).second); - return maxCacheSize; - } - - void Device::setCacheSize(size_t bytes) - { -#if defined(EMBREE_GEOMETRY_SUBDIVISION) - Lock<MutexSys> lock(g_mutex); - if (bytes == 0) g_cache_size_map.erase(this); - else g_cache_size_map[this] = bytes; - - size_t maxCacheSize = getMaxCacheSize(); - resizeTessellationCache(maxCacheSize); -#endif - } - - void Device::initTaskingSystem(size_t numThreads) - { - Lock<MutexSys> lock(g_mutex); - if (numThreads == 0) - g_num_threads_map[this] = std::numeric_limits<size_t>::max(); - else - g_num_threads_map[this] = numThreads; - - /* create task scheduler */ - size_t maxNumThreads = getMaxNumThreads(); - TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads); -#if USE_TASK_ARENA - const size_t nThreads = min(maxNumThreads,TaskScheduler::threadCount()); - const size_t uThreads = min(max(numUserThreads,(size_t)1),nThreads); - arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads)); -#endif - } - - void Device::exitTaskingSystem() - { - Lock<MutexSys> lock(g_mutex); - g_num_threads_map.erase(this); - - /* terminate tasking system */ - if (g_num_threads_map.size() == 0) { - TaskScheduler::destroy(); - } - /* or configure new number of threads */ - else { - size_t maxNumThreads = getMaxNumThreads(); - TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads); - } -#if USE_TASK_ARENA - arena.reset(); -#endif - } - - void Device::setProperty(const RTCDeviceProperty prop, ssize_t val) - { - /* hidden internal properties */ - switch ((size_t)prop) - { - case 1000000: debug_int0 = val; return; - case 1000001: debug_int1 = val; return; - case 1000002: debug_int2 = val; return; - case 1000003: debug_int3 = val; return; - } - - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown writable property"); - } - - ssize_t Device::getProperty(const RTCDeviceProperty prop) - { - size_t iprop = (size_t)prop; - - /* get name of internal regression test */ - if (iprop >= 2000000 && iprop < 3000000) - { - RegressionTest* test = getRegressionTest(iprop-2000000); - if (test) return (ssize_t) test->name.c_str(); - else return 0; - } - - /* run internal regression test */ - if (iprop >= 3000000 && iprop < 4000000) - { - RegressionTest* test = getRegressionTest(iprop-3000000); - if (test) return test->run(); - else return 0; - } - - /* documented properties */ - switch (prop) - { - case RTC_DEVICE_PROPERTY_VERSION_MAJOR: return RTC_VERSION_MAJOR; - case RTC_DEVICE_PROPERTY_VERSION_MINOR: return RTC_VERSION_MINOR; - case RTC_DEVICE_PROPERTY_VERSION_PATCH: return RTC_VERSION_PATCH; - case RTC_DEVICE_PROPERTY_VERSION : return RTC_VERSION; - -#if defined(EMBREE_TARGET_SIMD4) && defined(EMBREE_RAY_PACKETS) - case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return hasISA(SSE2); -#else - case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return 0; -#endif - -#if defined(EMBREE_TARGET_SIMD8) && defined(EMBREE_RAY_PACKETS) - case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return hasISA(AVX); -#else - case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return 0; -#endif - -#if defined(EMBREE_TARGET_SIMD16) && defined(EMBREE_RAY_PACKETS) - case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return hasISA(AVX512KNL) | hasISA(AVX512SKX); -#else - case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return 0; -#endif - -#if defined(EMBREE_RAY_PACKETS) - case RTC_DEVICE_PROPERTY_RAY_STREAM_SUPPORTED: return 1; -#else - case RTC_DEVICE_PROPERTY_RAY_STREAM_SUPPORTED: return 0; -#endif - -#if defined(EMBREE_RAY_MASK) - case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 1; -#else - case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 0; -#endif - -#if defined(EMBREE_BACKFACE_CULLING) - case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 1; -#else - case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 0; -#endif - -#if defined(EMBREE_BACKFACE_CULLING_CURVES) - case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 1; -#else - case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 0; -#endif - -#if defined(EMBREE_COMPACT_POLYS) - case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 1; -#else - case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 0; -#endif - -#if defined(EMBREE_FILTER_FUNCTION) - case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 1; -#else - case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 0; -#endif - -#if defined(EMBREE_IGNORE_INVALID_RAYS) - case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 1; -#else - case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 0; -#endif - -#if defined(TASKING_INTERNAL) - case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 0; -#endif - -#if defined(TASKING_TBB) - case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 1; -#endif - -#if defined(TASKING_PPL) - case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 2; -#endif - -#if defined(TASKING_GCD) && defined(BUILD_IOS) - case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 3; -#endif - -#if defined(EMBREE_GEOMETRY_TRIANGLE) - case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 1; -#else - case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 0; -#endif - -#if defined(EMBREE_GEOMETRY_QUAD) - case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 1; -#else - case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 0; -#endif - -#if defined(EMBREE_GEOMETRY_CURVE) - case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 1; -#else - case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 0; -#endif - -#if defined(EMBREE_GEOMETRY_SUBDIVISION) - case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 1; -#else - case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 0; -#endif - -#if defined(EMBREE_GEOMETRY_USER) - case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 1; -#else - case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 0; -#endif - -#if defined(EMBREE_GEOMETRY_POINT) - case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 1; -#else - case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 0; -#endif - -#if defined(TASKING_PPL) - case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0; -#elif defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8) - case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0; -#else - case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 1; -#endif - -#if defined(TASKING_TBB) && TASKING_TBB_USE_TASK_ISOLATION - case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 1; -#else - case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 0; -#endif - - default: throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown readable property"); break; - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/common/device.h b/thirdparty/embree-aarch64/kernels/common/device.h deleted file mode 100644 index e9a81bb109..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/device.h +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" -#include "state.h" -#include "accel.h" - -namespace embree -{ - class BVH4Factory; - class BVH8Factory; - - class Device : public State, public MemoryMonitorInterface - { - ALIGNED_CLASS_(16); - - public: - - /*! Device construction */ - Device (const char* cfg); - - /*! Device destruction */ - virtual ~Device (); - - /*! prints info about the device */ - void print(); - - /*! sets the error code */ - void setDeviceErrorCode(RTCError error); - - /*! returns and clears the error code */ - RTCError getDeviceErrorCode(); - - /*! sets the error code */ - static void setThreadErrorCode(RTCError error); - - /*! returns and clears the error code */ - static RTCError getThreadErrorCode(); - - /*! processes error codes, do not call directly */ - static void process_error(Device* device, RTCError error, const char* str); - - /*! invokes the memory monitor callback */ - void memoryMonitor(ssize_t bytes, bool post); - - /*! sets the size of the software cache. */ - void setCacheSize(size_t bytes); - - /*! sets a property */ - void setProperty(const RTCDeviceProperty prop, ssize_t val); - - /*! gets a property */ - ssize_t getProperty(const RTCDeviceProperty prop); - - private: - - /*! initializes the tasking system */ - void initTaskingSystem(size_t numThreads); - - /*! shuts down the tasking system */ - void exitTaskingSystem(); - - /*! some variables that can be set via rtcSetParameter1i for debugging purposes */ - public: - static ssize_t debug_int0; - static ssize_t debug_int1; - static ssize_t debug_int2; - static ssize_t debug_int3; - - public: - std::unique_ptr<BVH4Factory> bvh4_factory; -#if defined(EMBREE_TARGET_SIMD8) - std::unique_ptr<BVH8Factory> bvh8_factory; -#endif - -#if USE_TASK_ARENA - std::unique_ptr<tbb::task_arena> arena; -#endif - - /* ray streams filter */ - RayStreamFilterFuncs rayStreamFilters; - }; -} diff --git a/thirdparty/embree-aarch64/kernels/common/geometry.cpp b/thirdparty/embree-aarch64/kernels/common/geometry.cpp deleted file mode 100644 index b3aa8e3396..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/geometry.cpp +++ /dev/null @@ -1,259 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "geometry.h" -#include "scene.h" - -namespace embree -{ - const char* Geometry::gtype_names[Geometry::GTY_END] = - { - "flat_linear_curve", - "round_linear_curve", - "oriented_linear_curve", - "", - "flat_bezier_curve", - "round_bezier_curve", - "oriented_bezier_curve", - "", - "flat_bspline_curve", - "round_bspline_curve", - "oriented_bspline_curve", - "", - "flat_hermite_curve", - "round_hermite_curve", - "oriented_hermite_curve", - "", - "flat_catmull_rom_curve", - "round_catmull_rom_curve", - "oriented_catmull_rom_curve", - "", - "triangles", - "quads", - "grid", - "subdivs", - "", - "sphere", - "disc", - "oriented_disc", - "", - "usergeom", - "instance_cheap", - "instance_expensive", - }; - - Geometry::Geometry (Device* device, GType gtype, unsigned int numPrimitives, unsigned int numTimeSteps) - : device(device), userPtr(nullptr), - numPrimitives(numPrimitives), numTimeSteps(unsigned(numTimeSteps)), fnumTimeSegments(float(numTimeSteps-1)), time_range(0.0f,1.0f), - mask(-1), - gtype(gtype), - gsubtype(GTY_SUBTYPE_DEFAULT), - quality(RTC_BUILD_QUALITY_MEDIUM), - state((unsigned)State::MODIFIED), - enabled(true), - intersectionFilterN(nullptr), occlusionFilterN(nullptr), pointQueryFunc(nullptr) - { - device->refInc(); - } - - Geometry::~Geometry() - { - device->refDec(); - } - - void Geometry::setNumPrimitives(unsigned int numPrimitives_in) - { - if (numPrimitives_in == numPrimitives) return; - - numPrimitives = numPrimitives_in; - - Geometry::update(); - } - - void Geometry::setNumTimeSteps (unsigned int numTimeSteps_in) - { - if (numTimeSteps_in == numTimeSteps) { - return; - } - - numTimeSteps = numTimeSteps_in; - fnumTimeSegments = float(numTimeSteps_in-1); - - Geometry::update(); - } - - void Geometry::setTimeRange (const BBox1f range) - { - time_range = range; - Geometry::update(); - } - - void Geometry::update() - { - ++modCounter_; // FIXME: required? - state = (unsigned)State::MODIFIED; - } - - void Geometry::commit() - { - ++modCounter_; - state = (unsigned)State::COMMITTED; - } - - void Geometry::preCommit() - { - if (State::MODIFIED == (State)state) - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"geometry not committed"); - } - - void Geometry::postCommit() - { - } - - void Geometry::enable () - { - if (isEnabled()) - return; - - enabled = true; - ++modCounter_; - } - - void Geometry::disable () - { - if (isDisabled()) - return; - - enabled = false; - ++modCounter_; - } - - void Geometry::setUserData (void* ptr) - { - userPtr = ptr; - } - - void Geometry::setIntersectionFilterFunctionN (RTCFilterFunctionN filter) - { - if (!(getTypeMask() & (MTY_TRIANGLE_MESH | MTY_QUAD_MESH | MTY_CURVES | MTY_SUBDIV_MESH | MTY_USER_GEOMETRY | MTY_GRID_MESH))) - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"filter functions not supported for this geometry"); - - intersectionFilterN = filter; - } - - void Geometry::setOcclusionFilterFunctionN (RTCFilterFunctionN filter) - { - if (!(getTypeMask() & (MTY_TRIANGLE_MESH | MTY_QUAD_MESH | MTY_CURVES | MTY_SUBDIV_MESH | MTY_USER_GEOMETRY | MTY_GRID_MESH))) - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"filter functions not supported for this geometry"); - - occlusionFilterN = filter; - } - - void Geometry::setPointQueryFunction (RTCPointQueryFunction func) - { - pointQueryFunc = func; - } - - void Geometry::interpolateN(const RTCInterpolateNArguments* const args) - { - const void* valid_i = args->valid; - const unsigned* primIDs = args->primIDs; - const float* u = args->u; - const float* v = args->v; - unsigned int N = args->N; - RTCBufferType bufferType = args->bufferType; - unsigned int bufferSlot = args->bufferSlot; - float* P = args->P; - float* dPdu = args->dPdu; - float* dPdv = args->dPdv; - float* ddPdudu = args->ddPdudu; - float* ddPdvdv = args->ddPdvdv; - float* ddPdudv = args->ddPdudv; - unsigned int valueCount = args->valueCount; - - if (valueCount > 256) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"maximally 256 floating point values can be interpolated per vertex"); - const int* valid = (const int*) valid_i; - - __aligned(64) float P_tmp[256]; - __aligned(64) float dPdu_tmp[256]; - __aligned(64) float dPdv_tmp[256]; - __aligned(64) float ddPdudu_tmp[256]; - __aligned(64) float ddPdvdv_tmp[256]; - __aligned(64) float ddPdudv_tmp[256]; - - float* Pt = P ? P_tmp : nullptr; - float* dPdut = nullptr, *dPdvt = nullptr; - if (dPdu) { dPdut = dPdu_tmp; dPdvt = dPdv_tmp; } - float* ddPdudut = nullptr, *ddPdvdvt = nullptr, *ddPdudvt = nullptr; - if (ddPdudu) { ddPdudut = ddPdudu_tmp; ddPdvdvt = ddPdvdv_tmp; ddPdudvt = ddPdudv_tmp; } - - for (unsigned int i=0; i<N; i++) - { - if (valid && !valid[i]) continue; - - RTCInterpolateArguments iargs; - iargs.primID = primIDs[i]; - iargs.u = u[i]; - iargs.v = v[i]; - iargs.bufferType = bufferType; - iargs.bufferSlot = bufferSlot; - iargs.P = Pt; - iargs.dPdu = dPdut; - iargs.dPdv = dPdvt; - iargs.ddPdudu = ddPdudut; - iargs.ddPdvdv = ddPdvdvt; - iargs.ddPdudv = ddPdudvt; - iargs.valueCount = valueCount; - interpolate(&iargs); - - if (likely(P)) { - for (unsigned int j=0; j<valueCount; j++) - P[j*N+i] = Pt[j]; - } - if (likely(dPdu)) - { - for (unsigned int j=0; j<valueCount; j++) { - dPdu[j*N+i] = dPdut[j]; - dPdv[j*N+i] = dPdvt[j]; - } - } - if (likely(ddPdudu)) - { - for (unsigned int j=0; j<valueCount; j++) { - ddPdudu[j*N+i] = ddPdudut[j]; - ddPdvdv[j*N+i] = ddPdvdvt[j]; - ddPdudv[j*N+i] = ddPdudvt[j]; - } - } - } - } - - bool Geometry::pointQuery(PointQuery* query, PointQueryContext* context) - { - assert(context->primID < size()); - - RTCPointQueryFunctionArguments args; - args.query = (RTCPointQuery*)context->query_ws; - args.userPtr = context->userPtr; - args.primID = context->primID; - args.geomID = context->geomID; - args.context = context->userContext; - args.similarityScale = context->similarityScale; - - bool update = false; - if(context->func) update |= context->func(&args); - if(pointQueryFunc) update |= pointQueryFunc(&args); - - if (update && context->userContext->instStackSize > 0) - { - // update point query - if (context->query_type == POINT_QUERY_TYPE_AABB) { - context->updateAABB(); - } else { - assert(context->similarityScale > 0.f); - query->radius = context->query_ws->radius * context->similarityScale; - } - } - return update; - } -} diff --git a/thirdparty/embree-aarch64/kernels/common/geometry.h b/thirdparty/embree-aarch64/kernels/common/geometry.h deleted file mode 100644 index 953974bfd2..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/geometry.h +++ /dev/null @@ -1,582 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" -#include "device.h" -#include "buffer.h" -#include "../common/point_query.h" -#include "../builders/priminfo.h" - -namespace embree -{ - class Scene; - class Geometry; - - struct GeometryCounts - { - __forceinline GeometryCounts() - : numFilterFunctions(0), - numTriangles(0), numMBTriangles(0), - numQuads(0), numMBQuads(0), - numBezierCurves(0), numMBBezierCurves(0), - numLineSegments(0), numMBLineSegments(0), - numSubdivPatches(0), numMBSubdivPatches(0), - numUserGeometries(0), numMBUserGeometries(0), - numInstancesCheap(0), numMBInstancesCheap(0), - numInstancesExpensive(0), numMBInstancesExpensive(0), - numGrids(0), numMBGrids(0), - numPoints(0), numMBPoints(0) {} - - __forceinline size_t size() const { - return numTriangles + numQuads + numBezierCurves + numLineSegments + numSubdivPatches + numUserGeometries + numInstancesCheap + numInstancesExpensive + numGrids + numPoints - + numMBTriangles + numMBQuads + numMBBezierCurves + numMBLineSegments + numMBSubdivPatches + numMBUserGeometries + numMBInstancesCheap + numMBInstancesExpensive + numMBGrids + numMBPoints; - } - - __forceinline unsigned int enabledGeometryTypesMask() const - { - unsigned int mask = 0; - if (numTriangles) mask |= 1 << 0; - if (numQuads) mask |= 1 << 1; - if (numBezierCurves+numLineSegments) mask |= 1 << 2; - if (numSubdivPatches) mask |= 1 << 3; - if (numUserGeometries) mask |= 1 << 4; - if (numInstancesCheap) mask |= 1 << 5; - if (numInstancesExpensive) mask |= 1 << 6; - if (numGrids) mask |= 1 << 7; - if (numPoints) mask |= 1 << 8; - - unsigned int maskMB = 0; - if (numMBTriangles) maskMB |= 1 << 0; - if (numMBQuads) maskMB |= 1 << 1; - if (numMBBezierCurves+numMBLineSegments) maskMB |= 1 << 2; - if (numMBSubdivPatches) maskMB |= 1 << 3; - if (numMBUserGeometries) maskMB |= 1 << 4; - if (numMBInstancesCheap) maskMB |= 1 << 5; - if (numMBInstancesExpensive) maskMB |= 1 << 6; - if (numMBGrids) maskMB |= 1 << 7; - if (numMBPoints) maskMB |= 1 << 8; - - return (mask<<8) + maskMB; - } - - __forceinline GeometryCounts operator+ (GeometryCounts const & rhs) const - { - GeometryCounts ret; - ret.numFilterFunctions = numFilterFunctions + rhs.numFilterFunctions; - ret.numTriangles = numTriangles + rhs.numTriangles; - ret.numMBTriangles = numMBTriangles + rhs.numMBTriangles; - ret.numQuads = numQuads + rhs.numQuads; - ret.numMBQuads = numMBQuads + rhs.numMBQuads; - ret.numBezierCurves = numBezierCurves + rhs.numBezierCurves; - ret.numMBBezierCurves = numMBBezierCurves + rhs.numMBBezierCurves; - ret.numLineSegments = numLineSegments + rhs.numLineSegments; - ret.numMBLineSegments = numMBLineSegments + rhs.numMBLineSegments; - ret.numSubdivPatches = numSubdivPatches + rhs.numSubdivPatches; - ret.numMBSubdivPatches = numMBSubdivPatches + rhs.numMBSubdivPatches; - ret.numUserGeometries = numUserGeometries + rhs.numUserGeometries; - ret.numMBUserGeometries = numMBUserGeometries + rhs.numMBUserGeometries; - ret.numInstancesCheap = numInstancesCheap + rhs.numInstancesCheap; - ret.numMBInstancesCheap = numMBInstancesCheap + rhs.numMBInstancesCheap; - ret.numInstancesExpensive = numInstancesExpensive + rhs.numInstancesExpensive; - ret.numMBInstancesExpensive = numMBInstancesExpensive + rhs.numMBInstancesExpensive; - ret.numGrids = numGrids + rhs.numGrids; - ret.numMBGrids = numMBGrids + rhs.numMBGrids; - ret.numPoints = numPoints + rhs.numPoints; - ret.numMBPoints = numMBPoints + rhs.numMBPoints; - - return ret; - } - - size_t numFilterFunctions; //!< number of geometries with filter functions enabled - size_t numTriangles; //!< number of enabled triangles - size_t numMBTriangles; //!< number of enabled motion blured triangles - size_t numQuads; //!< number of enabled quads - size_t numMBQuads; //!< number of enabled motion blurred quads - size_t numBezierCurves; //!< number of enabled curves - size_t numMBBezierCurves; //!< number of enabled motion blurred curves - size_t numLineSegments; //!< number of enabled line segments - size_t numMBLineSegments; //!< number of enabled line motion blurred segments - size_t numSubdivPatches; //!< number of enabled subdivision patches - size_t numMBSubdivPatches; //!< number of enabled motion blured subdivision patches - size_t numUserGeometries; //!< number of enabled user geometries - size_t numMBUserGeometries; //!< number of enabled motion blurred user geometries - size_t numInstancesCheap; //!< number of enabled cheap instances - size_t numMBInstancesCheap; //!< number of enabled motion blurred cheap instances - size_t numInstancesExpensive; //!< number of enabled expensive instances - size_t numMBInstancesExpensive; //!< number of enabled motion blurred expensive instances - size_t numGrids; //!< number of enabled grid geometries - size_t numMBGrids; //!< number of enabled motion blurred grid geometries - size_t numPoints; //!< number of enabled points - size_t numMBPoints; //!< number of enabled motion blurred points - }; - - /*! Base class all geometries are derived from */ - class Geometry : public RefCount - { - friend class Scene; - public: - - /*! type of geometry */ - enum GType - { - GTY_FLAT_LINEAR_CURVE = 0, - GTY_ROUND_LINEAR_CURVE = 1, - GTY_ORIENTED_LINEAR_CURVE = 2, - GTY_CONE_LINEAR_CURVE = 3, - - GTY_FLAT_BEZIER_CURVE = 4, - GTY_ROUND_BEZIER_CURVE = 5, - GTY_ORIENTED_BEZIER_CURVE = 6, - - GTY_FLAT_BSPLINE_CURVE = 8, - GTY_ROUND_BSPLINE_CURVE = 9, - GTY_ORIENTED_BSPLINE_CURVE = 10, - - GTY_FLAT_HERMITE_CURVE = 12, - GTY_ROUND_HERMITE_CURVE = 13, - GTY_ORIENTED_HERMITE_CURVE = 14, - - GTY_FLAT_CATMULL_ROM_CURVE = 16, - GTY_ROUND_CATMULL_ROM_CURVE = 17, - GTY_ORIENTED_CATMULL_ROM_CURVE = 18, - - GTY_TRIANGLE_MESH = 20, - GTY_QUAD_MESH = 21, - GTY_GRID_MESH = 22, - GTY_SUBDIV_MESH = 23, - - GTY_SPHERE_POINT = 25, - GTY_DISC_POINT = 26, - GTY_ORIENTED_DISC_POINT = 27, - - GTY_USER_GEOMETRY = 29, - GTY_INSTANCE_CHEAP = 30, - GTY_INSTANCE_EXPENSIVE = 31, - GTY_END = 32, - - GTY_BASIS_LINEAR = 0, - GTY_BASIS_BEZIER = 4, - GTY_BASIS_BSPLINE = 8, - GTY_BASIS_HERMITE = 12, - GTY_BASIS_CATMULL_ROM = 16, - GTY_BASIS_MASK = 28, - - GTY_SUBTYPE_FLAT_CURVE = 0, - GTY_SUBTYPE_ROUND_CURVE = 1, - GTY_SUBTYPE_ORIENTED_CURVE = 2, - GTY_SUBTYPE_MASK = 3, - }; - - enum GSubType - { - GTY_SUBTYPE_DEFAULT= 0, - GTY_SUBTYPE_INSTANCE_LINEAR = 0, - GTY_SUBTYPE_INSTANCE_QUATERNION = 1 - }; - - enum GTypeMask - { - MTY_FLAT_LINEAR_CURVE = 1ul << GTY_FLAT_LINEAR_CURVE, - MTY_ROUND_LINEAR_CURVE = 1ul << GTY_ROUND_LINEAR_CURVE, - MTY_CONE_LINEAR_CURVE = 1ul << GTY_CONE_LINEAR_CURVE, - MTY_ORIENTED_LINEAR_CURVE = 1ul << GTY_ORIENTED_LINEAR_CURVE, - - MTY_FLAT_BEZIER_CURVE = 1ul << GTY_FLAT_BEZIER_CURVE, - MTY_ROUND_BEZIER_CURVE = 1ul << GTY_ROUND_BEZIER_CURVE, - MTY_ORIENTED_BEZIER_CURVE = 1ul << GTY_ORIENTED_BEZIER_CURVE, - - MTY_FLAT_BSPLINE_CURVE = 1ul << GTY_FLAT_BSPLINE_CURVE, - MTY_ROUND_BSPLINE_CURVE = 1ul << GTY_ROUND_BSPLINE_CURVE, - MTY_ORIENTED_BSPLINE_CURVE = 1ul << GTY_ORIENTED_BSPLINE_CURVE, - - MTY_FLAT_HERMITE_CURVE = 1ul << GTY_FLAT_HERMITE_CURVE, - MTY_ROUND_HERMITE_CURVE = 1ul << GTY_ROUND_HERMITE_CURVE, - MTY_ORIENTED_HERMITE_CURVE = 1ul << GTY_ORIENTED_HERMITE_CURVE, - - MTY_FLAT_CATMULL_ROM_CURVE = 1ul << GTY_FLAT_CATMULL_ROM_CURVE, - MTY_ROUND_CATMULL_ROM_CURVE = 1ul << GTY_ROUND_CATMULL_ROM_CURVE, - MTY_ORIENTED_CATMULL_ROM_CURVE = 1ul << GTY_ORIENTED_CATMULL_ROM_CURVE, - - MTY_CURVE2 = MTY_FLAT_LINEAR_CURVE | MTY_ROUND_LINEAR_CURVE | MTY_CONE_LINEAR_CURVE | MTY_ORIENTED_LINEAR_CURVE, - - MTY_CURVE4 = MTY_FLAT_BEZIER_CURVE | MTY_ROUND_BEZIER_CURVE | MTY_ORIENTED_BEZIER_CURVE | - MTY_FLAT_BSPLINE_CURVE | MTY_ROUND_BSPLINE_CURVE | MTY_ORIENTED_BSPLINE_CURVE | - MTY_FLAT_HERMITE_CURVE | MTY_ROUND_HERMITE_CURVE | MTY_ORIENTED_HERMITE_CURVE | - MTY_FLAT_CATMULL_ROM_CURVE | MTY_ROUND_CATMULL_ROM_CURVE | MTY_ORIENTED_CATMULL_ROM_CURVE, - - MTY_SPHERE_POINT = 1ul << GTY_SPHERE_POINT, - MTY_DISC_POINT = 1ul << GTY_DISC_POINT, - MTY_ORIENTED_DISC_POINT = 1ul << GTY_ORIENTED_DISC_POINT, - - MTY_POINTS = MTY_SPHERE_POINT | MTY_DISC_POINT | MTY_ORIENTED_DISC_POINT, - - MTY_CURVES = MTY_CURVE2 | MTY_CURVE4 | MTY_POINTS, - - MTY_TRIANGLE_MESH = 1ul << GTY_TRIANGLE_MESH, - MTY_QUAD_MESH = 1ul << GTY_QUAD_MESH, - MTY_GRID_MESH = 1ul << GTY_GRID_MESH, - MTY_SUBDIV_MESH = 1ul << GTY_SUBDIV_MESH, - MTY_USER_GEOMETRY = 1ul << GTY_USER_GEOMETRY, - - MTY_INSTANCE_CHEAP = 1ul << GTY_INSTANCE_CHEAP, - MTY_INSTANCE_EXPENSIVE = 1ul << GTY_INSTANCE_EXPENSIVE, - MTY_INSTANCE = MTY_INSTANCE_CHEAP | MTY_INSTANCE_EXPENSIVE - }; - - static const char* gtype_names[GTY_END]; - - enum class State : unsigned { - MODIFIED = 0, - COMMITTED = 1, - }; - - public: - - /*! Geometry constructor */ - Geometry (Device* device, GType gtype, unsigned int numPrimitives, unsigned int numTimeSteps); - - /*! Geometry destructor */ - virtual ~Geometry(); - - public: - - /*! tests if geometry is enabled */ - __forceinline bool isEnabled() const { return enabled; } - - /*! tests if geometry is disabled */ - __forceinline bool isDisabled() const { return !isEnabled(); } - - /*! tests if that geometry has some filter function set */ - __forceinline bool hasFilterFunctions () const { - return (intersectionFilterN != nullptr) || (occlusionFilterN != nullptr); - } - - /*! returns geometry type */ - __forceinline GType getType() const { return gtype; } - - /*! returns curve type */ - __forceinline GType getCurveType() const { return (GType)(gtype & GTY_SUBTYPE_MASK); } - - /*! returns curve basis */ - __forceinline GType getCurveBasis() const { return (GType)(gtype & GTY_BASIS_MASK); } - - /*! returns geometry type mask */ - __forceinline GTypeMask getTypeMask() const { return (GTypeMask)(1 << gtype); } - - /*! returns number of primitives */ - __forceinline size_t size() const { return numPrimitives; } - - /*! sets the number of primitives */ - virtual void setNumPrimitives(unsigned int numPrimitives_in); - - /*! sets number of time steps */ - virtual void setNumTimeSteps (unsigned int numTimeSteps_in); - - /*! sets motion blur time range */ - void setTimeRange (const BBox1f range); - - /*! sets number of vertex attributes */ - virtual void setVertexAttributeCount (unsigned int N) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - /*! sets number of topologies */ - virtual void setTopologyCount (unsigned int N) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - /*! sets the build quality */ - void setBuildQuality(RTCBuildQuality quality_in) - { - this->quality = quality_in; - Geometry::update(); - } - - /* calculate time segment itime and fractional time ftime */ - __forceinline int timeSegment(float time, float& ftime) const { - return getTimeSegment(time,time_range.lower,time_range.upper,fnumTimeSegments,ftime); - } - - template<int N> - __forceinline vint<N> timeSegment(const vfloat<N>& time, vfloat<N>& ftime) const { - return getTimeSegment(time,vfloat<N>(time_range.lower),vfloat<N>(time_range.upper),vfloat<N>(fnumTimeSegments),ftime); - } - - /* calculate overlapping time segment range */ - __forceinline range<int> timeSegmentRange(const BBox1f& range) const { - return getTimeSegmentRange(range,time_range,fnumTimeSegments); - } - - /* returns time that corresponds to time step */ - __forceinline float timeStep(const int i) const { - assert(i>=0 && i<(int)numTimeSteps); - return time_range.lower + time_range.size()*float(i)/fnumTimeSegments; - } - - /*! for all geometries */ - public: - - /*! Enable geometry. */ - virtual void enable(); - - /*! Update geometry. */ - void update(); - - /*! commit of geometry */ - virtual void commit(); - - /*! Update geometry buffer. */ - virtual void updateBuffer(RTCBufferType type, unsigned int slot) { - update(); // update everything for geometries not supporting this call - } - - /*! Disable geometry. */ - virtual void disable(); - - /*! Verify the geometry */ - virtual bool verify() { return true; } - - /*! called before every build */ - virtual void preCommit(); - - /*! called after every build */ - virtual void postCommit(); - - virtual void addElementsToCount (GeometryCounts & counts) const { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - }; - - /*! sets constant tessellation rate for the geometry */ - virtual void setTessellationRate(float N) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - /*! Sets the maximal curve radius scale allowed by min-width feature. */ - virtual void setMaxRadiusScale(float s) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - /*! Set user data pointer. */ - virtual void setUserData(void* ptr); - - /*! Get user data pointer. */ - __forceinline void* getUserData() const { - return userPtr; - } - - /*! interpolates user data to the specified u/v location */ - virtual void interpolate(const RTCInterpolateArguments* const args) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - /*! interpolates user data to the specified u/v locations */ - virtual void interpolateN(const RTCInterpolateNArguments* const args); - - /* point query api */ - bool pointQuery(PointQuery* query, PointQueryContext* context); - - /*! for subdivision surfaces only */ - public: - virtual void setSubdivisionMode (unsigned topologyID, RTCSubdivisionMode mode) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - virtual void setVertexAttributeTopology(unsigned int vertexBufferSlot, unsigned int indexBufferSlot) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - /*! Set displacement function. */ - virtual void setDisplacementFunction (RTCDisplacementFunctionN filter) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - virtual unsigned int getFirstHalfEdge(unsigned int faceID) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - virtual unsigned int getFace(unsigned int edgeID) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - virtual unsigned int getNextHalfEdge(unsigned int edgeID) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - virtual unsigned int getPreviousHalfEdge(unsigned int edgeID) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - virtual unsigned int getOppositeHalfEdge(unsigned int topologyID, unsigned int edgeID) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - /*! get fast access to first vertex buffer if applicable */ - virtual float * getCompactVertexArray () const { - return nullptr; - } - - /*! Returns the modified counter - how many times the geo has been modified */ - __forceinline unsigned int getModCounter () const { - return modCounter_; - } - - /*! for triangle meshes and bezier curves only */ - public: - - - /*! Sets ray mask. */ - virtual void setMask(unsigned mask) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - /*! Sets specified buffer. */ - virtual void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - /*! Gets specified buffer. */ - virtual void* getBuffer(RTCBufferType type, unsigned int slot) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - /*! Set intersection filter function for ray packets of size N. */ - virtual void setIntersectionFilterFunctionN (RTCFilterFunctionN filterN); - - /*! Set occlusion filter function for ray packets of size N. */ - virtual void setOcclusionFilterFunctionN (RTCFilterFunctionN filterN); - - /*! for instances only */ - public: - - /*! Sets the instanced scene */ - virtual void setInstancedScene(const Ref<Scene>& scene) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - /*! Sets transformation of the instance */ - virtual void setTransform(const AffineSpace3fa& transform, unsigned int timeStep) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - /*! Sets transformation of the instance */ - virtual void setQuaternionDecomposition(const AffineSpace3ff& qd, unsigned int timeStep) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - /*! Returns the transformation of the instance */ - virtual AffineSpace3fa getTransform(float time) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - /*! for user geometries only */ - public: - - /*! Set bounds function. */ - virtual void setBoundsFunction (RTCBoundsFunction bounds, void* userPtr) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - /*! Set intersect function for ray packets of size N. */ - virtual void setIntersectFunctionN (RTCIntersectFunctionN intersect) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - /*! Set occlusion function for ray packets of size N. */ - virtual void setOccludedFunctionN (RTCOccludedFunctionN occluded) { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); - } - - /*! Set point query function. */ - void setPointQueryFunction(RTCPointQueryFunction func); - - /*! returns number of time segments */ - __forceinline unsigned numTimeSegments () const { - return numTimeSteps-1; - } - - public: - - virtual PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefArray not implemented for this geometry"); - } - - virtual PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefMBArray not implemented for this geometry"); - } - - virtual PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefMBArray not implemented for this geometry"); - } - - virtual LinearSpace3fa computeAlignedSpace(const size_t primID) const { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeAlignedSpace not implemented for this geometry"); - } - - virtual LinearSpace3fa computeAlignedSpaceMB(const size_t primID, const BBox1f time_range) const { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeAlignedSpace not implemented for this geometry"); - } - - virtual Vec3fa computeDirection(unsigned int primID) const { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeDirection not implemented for this geometry"); - } - - virtual Vec3fa computeDirection(unsigned int primID, size_t time) const { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeDirection not implemented for this geometry"); - } - - virtual BBox3fa vbounds(size_t primID) const { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry"); - } - - virtual BBox3fa vbounds(const LinearSpace3fa& space, size_t primID) const { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry"); - } - - virtual BBox3fa vbounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry"); - } - - virtual LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry"); - } - - virtual LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry"); - } - - virtual LBBox3fa vlinearBounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const { - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry"); - } - - public: - __forceinline bool hasIntersectionFilter() const { return intersectionFilterN != nullptr; } - __forceinline bool hasOcclusionFilter() const { return occlusionFilterN != nullptr; } - - public: - Device* device; //!< device this geometry belongs to - - void* userPtr; //!< user pointer - unsigned int numPrimitives; //!< number of primitives of this geometry - - unsigned int numTimeSteps; //!< number of time steps - float fnumTimeSegments; //!< number of time segments (precalculation) - BBox1f time_range; //!< motion blur time range - - unsigned int mask; //!< for masking out geometry - unsigned int modCounter_ = 1; //!< counter for every modification - used to rebuild scenes when geo is modified - - struct { - GType gtype : 8; //!< geometry type - GSubType gsubtype : 8; //!< geometry subtype - RTCBuildQuality quality : 3; //!< build quality for geometry - unsigned state : 2; - bool enabled : 1; //!< true if geometry is enabled - }; - - RTCFilterFunctionN intersectionFilterN; - RTCFilterFunctionN occlusionFilterN; - RTCPointQueryFunction pointQueryFunc; - }; -} diff --git a/thirdparty/embree-aarch64/kernels/common/hit.h b/thirdparty/embree-aarch64/kernels/common/hit.h deleted file mode 100644 index 32a198cdfe..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/hit.h +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" -#include "ray.h" -#include "instance_stack.h" - -namespace embree -{ - /* Hit structure for K hits */ - template<int K> - struct HitK - { - /* Default construction does nothing */ - __forceinline HitK() {} - - /* Constructs a hit */ - __forceinline HitK(const RTCIntersectContext* context, const vuint<K>& geomID, const vuint<K>& primID, const vfloat<K>& u, const vfloat<K>& v, const Vec3vf<K>& Ng) - : Ng(Ng), u(u), v(v), primID(primID), geomID(geomID) - { - for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) - instID[l] = RTC_INVALID_GEOMETRY_ID; - instance_id_stack::copy(context->instID, instID); - } - - /* Returns the size of the hit */ - static __forceinline size_t size() { return K; } - - public: - Vec3vf<K> Ng; // geometry normal - vfloat<K> u; // barycentric u coordinate of hit - vfloat<K> v; // barycentric v coordinate of hit - vuint<K> primID; // primitive ID - vuint<K> geomID; // geometry ID - vuint<K> instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID - }; - - /* Specialization for a single hit */ - template<> - struct __aligned(16) HitK<1> - { - /* Default construction does nothing */ - __forceinline HitK() {} - - /* Constructs a hit */ - __forceinline HitK(const RTCIntersectContext* context, unsigned int geomID, unsigned int primID, float u, float v, const Vec3fa& Ng) - : Ng(Ng.x,Ng.y,Ng.z), u(u), v(v), primID(primID), geomID(geomID) - { - instance_id_stack::copy(context->instID, instID); - } - - /* Returns the size of the hit */ - static __forceinline size_t size() { return 1; } - - public: - Vec3<float> Ng; // geometry normal - float u; // barycentric u coordinate of hit - float v; // barycentric v coordinate of hit - unsigned int primID; // primitive ID - unsigned int geomID; // geometry ID - unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID - }; - - /* Shortcuts */ - typedef HitK<1> Hit; - typedef HitK<4> Hit4; - typedef HitK<8> Hit8; - typedef HitK<16> Hit16; - - /* Outputs hit to stream */ - template<int K> - __forceinline embree_ostream operator<<(embree_ostream cout, const HitK<K>& ray) - { - cout << "{ " << embree_endl - << " Ng = " << ray.Ng << embree_endl - << " u = " << ray.u << embree_endl - << " v = " << ray.v << embree_endl - << " primID = " << ray.primID << embree_endl - << " geomID = " << ray.geomID << embree_endl - << " instID ="; - for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) - { - cout << " " << ray.instID[l]; - } - cout << embree_endl; - return cout << "}"; - } - - template<typename Hit> - __forceinline void copyHitToRay(RayHit& ray, const Hit& hit) - { - ray.Ng = hit.Ng; - ray.u = hit.u; - ray.v = hit.v; - ray.primID = hit.primID; - ray.geomID = hit.geomID; - instance_id_stack::copy(hit.instID, ray.instID); - } - - template<int K> - __forceinline void copyHitToRay(const vbool<K> &mask, RayHitK<K> &ray, const HitK<K> &hit) - { - vfloat<K>::storeu(mask,&ray.Ng.x, hit.Ng.x); - vfloat<K>::storeu(mask,&ray.Ng.y, hit.Ng.y); - vfloat<K>::storeu(mask,&ray.Ng.z, hit.Ng.z); - vfloat<K>::storeu(mask,&ray.u, hit.u); - vfloat<K>::storeu(mask,&ray.v, hit.v); - vuint<K>::storeu(mask,&ray.primID, hit.primID); - vuint<K>::storeu(mask,&ray.geomID, hit.geomID); - instance_id_stack::copy(hit.instID, ray.instID, mask); - } -} diff --git a/thirdparty/embree-aarch64/kernels/common/instance_stack.h b/thirdparty/embree-aarch64/kernels/common/instance_stack.h deleted file mode 100644 index d7e3637f7b..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/instance_stack.h +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "rtcore.h" - -namespace embree { -namespace instance_id_stack { - -static_assert(RTC_MAX_INSTANCE_LEVEL_COUNT > 0, - "RTC_MAX_INSTANCE_LEVEL_COUNT must be greater than 0."); - -/******************************************************************************* - * Instance ID stack manipulation. - * This is used from the instance intersector. - ******************************************************************************/ - -/* - * Push an instance to the stack. - */ -RTC_FORCEINLINE bool push(RTCIntersectContext* context, - unsigned instanceId) -{ -#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1 - const bool spaceAvailable = context->instStackSize < RTC_MAX_INSTANCE_LEVEL_COUNT; - /* We assert here because instances are silently dropped when the stack is full. - This might be quite hard to find in production. */ - assert(spaceAvailable); - if (likely(spaceAvailable)) - context->instID[context->instStackSize++] = instanceId; - return spaceAvailable; -#else - const bool spaceAvailable = (context->instID[0] == RTC_INVALID_GEOMETRY_ID); - assert(spaceAvailable); - if (likely(spaceAvailable)) - context->instID[0] = instanceId; - return spaceAvailable; -#endif -} - - -/* - * Pop the last instance pushed to the stack. - * Do not call on an empty stack. - */ -RTC_FORCEINLINE void pop(RTCIntersectContext* context) -{ - assert(context); -#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1 - assert(context->instStackSize > 0); - context->instID[--context->instStackSize] = RTC_INVALID_GEOMETRY_ID; -#else - assert(context->instID[0] != RTC_INVALID_GEOMETRY_ID); - context->instID[0] = RTC_INVALID_GEOMETRY_ID; -#endif -} - -/******************************************************************************* - * Optimized instance id stack copy. - * The copy() function at the bottom of this block will either copy full - * stacks or copy only until the last valid element has been copied, depending - * on RTC_MAX_INSTANCE_LEVEL_COUNT. - ******************************************************************************/ - -/* - * Plain array assignment. This works for scalar->scalar, - * scalar->vector, and vector->vector. - */ -template <class Src, class Tgt> -RTC_FORCEINLINE void level_copy(unsigned level, Src* src, Tgt* tgt) -{ - tgt[level] = src[level]; -} - -/* - * Masked SIMD vector->vector store. - */ -template <int K> -RTC_FORCEINLINE void level_copy(unsigned level, const vuint<K>* src, vuint<K>* tgt, const vbool<K>& mask) -{ - vuint<K>::storeu(mask, tgt + level, src[level]); -} - -/* - * Masked scalar->SIMD vector store. - */ -template <int K> -RTC_FORCEINLINE void level_copy(unsigned level, const unsigned* src, vuint<K>* tgt, const vbool<K>& mask) -{ - vuint<K>::store(mask, tgt + level, src[level]); -} - -/* - * Indexed assign from vector to scalar. - */ -template <int K> -RTC_FORCEINLINE void level_copy(unsigned level, const vuint<K>* src, unsigned* tgt, const size_t& idx) -{ - tgt[level] = src[level][idx]; -} - -/* - * Indexed assign from scalar to vector. - */ -template <int K> -RTC_FORCEINLINE void level_copy(unsigned level, const unsigned* src, vuint<K>* tgt, const size_t& idx) -{ - tgt[level][idx] = src[level]; -} - -/* - * Indexed assign from vector to vector. - */ -template <int K> -RTC_FORCEINLINE void level_copy(unsigned level, const vuint<K>* src, vuint<K>* tgt, const size_t& i, const size_t& j) -{ - tgt[level][j] = src[level][i]; -} - -/* - * Check if the given stack level is valid. - * These are only used for large max stack sizes. - */ -RTC_FORCEINLINE bool level_valid(unsigned level, const unsigned* stack) -{ - return stack[level] != RTC_INVALID_GEOMETRY_ID; -} -RTC_FORCEINLINE bool level_valid(unsigned level, const unsigned* stack, const size_t& /*i*/) -{ - return stack[level] != RTC_INVALID_GEOMETRY_ID; -} -template <int K> -RTC_FORCEINLINE bool level_valid(unsigned level, const unsigned* stack, const vbool<K>& /*mask*/) -{ - return stack[level] != RTC_INVALID_GEOMETRY_ID; -} - -template <int K> -RTC_FORCEINLINE bool level_valid(unsigned level, const vuint<K>* stack) -{ - return any(stack[level] != RTC_INVALID_GEOMETRY_ID); -} -template <int K> -RTC_FORCEINLINE bool level_valid(unsigned level, const vuint<K>* stack, const vbool<K>& mask) -{ - return any(mask & (stack[level] != RTC_INVALID_GEOMETRY_ID)); -} - -template <int K> -RTC_FORCEINLINE bool level_valid(unsigned level, const vuint<K>* stack, const size_t& i) -{ - return stack[level][i] != RTC_INVALID_GEOMETRY_ID; -} -template <int K> -RTC_FORCEINLINE bool level_valid(unsigned level, const vuint<K>* stack, const size_t& i, const size_t& /*j*/) -{ - return stack[level][i] != RTC_INVALID_GEOMETRY_ID; -} - -/* - * Copy an instance ID stack. - * - * This function automatically selects a LevelFunctor from the above Assign - * structs. - */ -template <class Src, class Tgt, class... Args> -RTC_FORCEINLINE void copy(Src src, Tgt tgt, Args&&... args) -{ -#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1) - /* - * Avoid all loops for only one level. - */ - level_copy(0, src, tgt, std::forward<Args>(args)...); - -#elif (RTC_MAX_INSTANCE_LEVEL_COUNT <= 4) - /* - * It is faster to avoid the valid test for low level counts. - * Just copy the whole stack. - */ - for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) - level_copy(l, src, tgt, std::forward<Args>(args)...); - -#else - /* - * For general stack sizes, it pays off to test for validity. - */ - bool valid = true; - for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT && valid; ++l) - { - level_copy(l, src, tgt, std::forward<Args>(args)...); - valid = level_valid(l, src, std::forward<Args>(args)...); - } -#endif -} - -} // namespace instance_id_stack -} // namespace embree - diff --git a/thirdparty/embree-aarch64/kernels/common/isa.h b/thirdparty/embree-aarch64/kernels/common/isa.h deleted file mode 100644 index 63fb8d3351..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/isa.h +++ /dev/null @@ -1,271 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../../common/sys/platform.h" -#include "../../common/sys/sysinfo.h" - -namespace embree -{ -#define DEFINE_SYMBOL2(type,name) \ - typedef type (*name##Func)(); \ - name##Func name; - -#define DECLARE_SYMBOL2(type,name) \ - namespace sse2 { extern type name(); } \ - namespace sse42 { extern type name(); } \ - namespace avx { extern type name(); } \ - namespace avx2 { extern type name(); } \ - namespace avx512knl { extern type name(); } \ - namespace avx512skx { extern type name(); } \ - void name##_error2() { throw_RTCError(RTC_ERROR_UNKNOWN,"internal error in ISA selection for " TOSTRING(name)); } \ - type name##_error() { return type(name##_error2); } \ - type name##_zero() { return type(nullptr); } - -#define DECLARE_ISA_FUNCTION(type,symbol,args) \ - namespace sse2 { extern type symbol(args); } \ - namespace sse42 { extern type symbol(args); } \ - namespace avx { extern type symbol(args); } \ - namespace avx2 { extern type symbol(args); } \ - namespace avx512knl { extern type symbol(args); } \ - namespace avx512skx { extern type symbol(args); } \ - inline type symbol##_error(args) { throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"function " TOSTRING(symbol) " not supported by your CPU"); } \ - typedef type (*symbol##Ty)(args); \ - -#define DEFINE_ISA_FUNCTION(type,symbol,args) \ - typedef type (*symbol##Func)(args); \ - symbol##Func symbol; - -#define ZERO_SYMBOL(features,intersector) \ - intersector = intersector##_zero; - -#define INIT_SYMBOL(features,intersector) \ - intersector = decltype(intersector)(intersector##_error); - -#define SELECT_SYMBOL_DEFAULT(features,intersector) \ - intersector = isa::intersector; - -#if defined(__SSE__) || defined(__ARM_NEON) -#if !defined(EMBREE_TARGET_SIMD4) -#define EMBREE_TARGET_SIMD4 -#endif -#endif - -#if defined(EMBREE_TARGET_SSE42) -#define SELECT_SYMBOL_SSE42(features,intersector) \ - if ((features & SSE42) == SSE42) intersector = sse42::intersector; -#else -#define SELECT_SYMBOL_SSE42(features,intersector) -#endif - -#if defined(EMBREE_TARGET_AVX) || defined(__AVX__) -#if !defined(EMBREE_TARGET_SIMD8) -#define EMBREE_TARGET_SIMD8 -#endif -#if defined(__AVX__) // if default ISA is >= AVX we treat AVX target as default target -#define SELECT_SYMBOL_AVX(features,intersector) \ - if ((features & ISA) == ISA) intersector = isa::intersector; -#else -#define SELECT_SYMBOL_AVX(features,intersector) \ - if ((features & AVX) == AVX) intersector = avx::intersector; -#endif -#else -#define SELECT_SYMBOL_AVX(features,intersector) -#endif - -#if defined(EMBREE_TARGET_AVX2) -#if !defined(EMBREE_TARGET_SIMD8) -#define EMBREE_TARGET_SIMD8 -#endif -#define SELECT_SYMBOL_AVX2(features,intersector) \ - if ((features & AVX2) == AVX2) intersector = avx2::intersector; -#else -#define SELECT_SYMBOL_AVX2(features,intersector) -#endif - -#if defined(EMBREE_TARGET_AVX512KNL) -#if !defined(EMBREE_TARGET_SIMD16) -#define EMBREE_TARGET_SIMD16 -#endif -#define SELECT_SYMBOL_AVX512KNL(features,intersector) \ - if ((features & AVX512KNL) == AVX512KNL) intersector = avx512knl::intersector; -#else -#define SELECT_SYMBOL_AVX512KNL(features,intersector) -#endif - -#if defined(EMBREE_TARGET_AVX512SKX) -#if !defined(EMBREE_TARGET_SIMD16) -#define EMBREE_TARGET_SIMD16 -#endif -#define SELECT_SYMBOL_AVX512SKX(features,intersector) \ - if ((features & AVX512SKX) == AVX512SKX) intersector = avx512skx::intersector; -#else -#define SELECT_SYMBOL_AVX512SKX(features,intersector) -#endif - -#define SELECT_SYMBOL_DEFAULT_SSE42(features,intersector) \ - SELECT_SYMBOL_DEFAULT(features,intersector); \ - SELECT_SYMBOL_SSE42(features,intersector); - -#define SELECT_SYMBOL_DEFAULT_SSE42_AVX(features,intersector) \ - SELECT_SYMBOL_DEFAULT(features,intersector); \ - SELECT_SYMBOL_SSE42(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); - -#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2(features,intersector) \ - SELECT_SYMBOL_DEFAULT(features,intersector); \ - SELECT_SYMBOL_SSE42(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX2(features,intersector); - -#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,intersector) \ - SELECT_SYMBOL_DEFAULT(features,intersector); \ - SELECT_SYMBOL_SSE42(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); - -#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \ - SELECT_SYMBOL_DEFAULT(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); - -#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,intersector) \ - SELECT_SYMBOL_DEFAULT(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); - -#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \ - SELECT_SYMBOL_DEFAULT(features,intersector); \ - SELECT_SYMBOL_SSE42(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); - -#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,intersector) \ - SELECT_SYMBOL_DEFAULT(features,intersector); \ - SELECT_SYMBOL_SSE42(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); - -#define SELECT_SYMBOL_DEFAULT_AVX(features,intersector) \ - SELECT_SYMBOL_DEFAULT(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); - -#define SELECT_SYMBOL_DEFAULT_AVX_AVX2(features,intersector) \ - SELECT_SYMBOL_DEFAULT(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX2(features,intersector); - -#define SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,intersector) \ - SELECT_SYMBOL_DEFAULT(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); - -#define SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL_AVX512SKX(features,intersector) \ - SELECT_SYMBOL_DEFAULT(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); - -#define SELECT_SYMBOL_DEFAULT_AVX_AVX512SKX(features,intersector) \ - SELECT_SYMBOL_DEFAULT(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); - -#define SELECT_SYMBOL_INIT_AVX(features,intersector) \ - INIT_SYMBOL(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); - -#define SELECT_SYMBOL_INIT_AVX_AVX2(features,intersector) \ - INIT_SYMBOL(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX2(features,intersector); - -#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,intersector) \ - INIT_SYMBOL(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); - -#define SELECT_SYMBOL_INIT_SSE42_AVX_AVX2(features,intersector) \ - INIT_SYMBOL(features,intersector); \ - SELECT_SYMBOL_SSE42(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX2(features,intersector); - -#define SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,intersector) \ - INIT_SYMBOL(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); - -#define SELECT_SYMBOL_INIT_AVX_AVX512KNL_AVX512SKX(features,intersector) \ - INIT_SYMBOL(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); - -#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL(features,intersector) \ - INIT_SYMBOL(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); - -#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \ - INIT_SYMBOL(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); - -#define SELECT_SYMBOL_INIT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \ - INIT_SYMBOL(features,intersector); \ - SELECT_SYMBOL_SSE42(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); - -#define SELECT_SYMBOL_ZERO_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \ - ZERO_SYMBOL(features,intersector); \ - SELECT_SYMBOL_SSE42(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); - -#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \ - SELECT_SYMBOL_DEFAULT(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); - -#define SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,intersector) \ - INIT_SYMBOL(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); - -#define SELECT_SYMBOL_SSE42_AVX_AVX2(features,intersector) \ - SELECT_SYMBOL_SSE42(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX2(features,intersector); - - struct VerifyMultiTargetLinking { - static __noinline int getISA(int depth = 5) { - if (depth == 0) return ISA; - else return getISA(depth-1); - } - }; - namespace sse2 { int getISA(); }; - namespace sse42 { int getISA(); }; - namespace avx { int getISA(); }; - namespace avx2 { int getISA(); }; - namespace avx512knl { int getISA(); }; - namespace avx512skx { int getISA(); }; -} diff --git a/thirdparty/embree-aarch64/kernels/common/motion_derivative.h b/thirdparty/embree-aarch64/kernels/common/motion_derivative.h deleted file mode 100644 index 82953f0e89..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/motion_derivative.h +++ /dev/null @@ -1,325 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../../common/math/affinespace.h" -#include "../../common/math/interval.h" - -#include <functional> - -namespace embree { - -#define MOTION_DERIVATIVE_ROOT_EPSILON 1e-4f - -static void motion_derivative_coefficients(const float *p, float *coeff); - -struct MotionDerivativeCoefficients -{ - float theta; - float coeffs[3*8*7]; - - MotionDerivativeCoefficients() {} - - // xfm0 and xfm1 are interpret as quaternion decomposition - MotionDerivativeCoefficients(AffineSpace3ff const& xfm0, AffineSpace3ff const& xfm1) - { - // cosTheta of the two quaternions - const float cosTheta = min(1.f, max(-1.f, - xfm0.l.vx.w * xfm1.l.vx.w - + xfm0.l.vy.w * xfm1.l.vy.w - + xfm0.l.vz.w * xfm1.l.vz.w - + xfm0.p.w * xfm1.p.w)); - - theta = std::acos(cosTheta); - Vec4f qperp(xfm1.p.w, xfm1.l.vx.w, xfm1.l.vy.w, xfm1.l.vz.w); - if (cosTheta < 0.995f) { - // compute perpendicular quaternion - qperp.x = xfm1.p.w - cosTheta * xfm0.p.w; - qperp.y = xfm1.l.vx.w - cosTheta * xfm0.l.vx.w; - qperp.z = xfm1.l.vy.w - cosTheta * xfm0.l.vy.w; - qperp.w = xfm1.l.vz.w - cosTheta * xfm0.l.vz.w; - qperp = normalize(qperp); - } - const float p[33] = { - theta, - xfm0.l.vx.y, xfm0.l.vx.z, xfm0.l.vy.z, // translation component of xfm0 - xfm1.l.vx.y, xfm1.l.vx.z, xfm1.l.vy.z, // translation component of xfm1 - xfm0.p.w, xfm0.l.vx.w, xfm0.l.vy.w, xfm0.l.vz.w, // quaternion of xfm0 - qperp.x, qperp.y, qperp.z, qperp.w, - xfm0.l.vx.x, xfm0.l.vy.x, xfm0.l.vz.x, xfm0.p.x, // scale/skew component of xfm0 - xfm0.l.vy.y, xfm0.l.vz.y, xfm0.p.y, - xfm0.l.vz.z, xfm0.p.z, - xfm1.l.vx.x, xfm1.l.vy.x, xfm1.l.vz.x, xfm1.p.x, // scale/skew component of xfm1 - xfm1.l.vy.y, xfm1.l.vz.y, xfm1.p.y, - xfm1.l.vz.z, xfm1.p.z - }; - motion_derivative_coefficients(p, coeffs); - } -}; - -struct MotionDerivative -{ - float twoTheta; - float c[8]; - - MotionDerivative(MotionDerivativeCoefficients const& mdc, - int dim, Vec3fa const& p0, Vec3fa const& p1) - : twoTheta(2.f*mdc.theta) - { - const float p[7] = { 1, p0.x, p0.y, p0.z, p1.x, p1.y, p1.z }; - for (int i = 0; i < 8; ++i) { - c[i] = 0; - for (int j = 0; j < 7; ++j) { - c[i] += mdc.coeffs[8*7*dim + i*7 + j] * p[j]; - } - } - } - - template<typename T> - struct EvalMotionDerivative - { - MotionDerivative const& md; - float offset; - - EvalMotionDerivative(MotionDerivative const& md, float offset) : md(md), offset(offset) {} - - T operator()(T const& time) const { - return md.c[0] + md.c[1] * time - + (md.c[2] + md.c[3] * time + md.c[4] * time * time) * cos(md.twoTheta * time) - + (md.c[5] + md.c[6] * time + md.c[7] * time * time) * sin(md.twoTheta * time) - + offset; - } - }; - - unsigned int findRoots( - Interval1f const& interval, - float offset, - float* roots, - unsigned int maxNumRoots) - { - unsigned int numRoots = 0; - EvalMotionDerivative<Interval1f> eval(*this, offset); - findRoots(eval, interval, numRoots, roots, maxNumRoots); - return numRoots; - } - - template<typename Eval> - static void findRoots( - - Eval const& eval, - Interval1f const& interval, - unsigned int& numRoots, - float* roots, - unsigned int maxNumRoots) - { - Interval1f range = eval(interval); - if (range.lower > 0 || range.upper < 0 || range.lower >= range.upper) return; - - const float split = 0.5f * (interval.upper + interval.lower); - if (interval.upper-interval.lower < 1e-7f || abs(split-interval.lower) < 1e-7f || abs(split-interval.upper) < 1e-7f) - { - // check if the root already exists - for (unsigned int k = 0; k < numRoots && k < maxNumRoots; ++k) { - if (abs(roots[k]-split) < MOTION_DERIVATIVE_ROOT_EPSILON) - return; - } - if (numRoots < maxNumRoots) { - roots[numRoots++] = split; - } - if (numRoots > maxNumRoots) { - printf("error: more roots than expected\n"); // FIXME: workaround for ICC2019.4 compiler bug under macOS - return; - } - return; - } - - findRoots(eval, Interval1f(interval.lower, split), numRoots, roots, maxNumRoots); - findRoots(eval, Interval1f(split, interval.upper), numRoots, roots, maxNumRoots); - } -}; - -/****************************************************************************** - * Code generated with sympy 1.4 * - * See http://www.sympy.org/ for more information. * - * * - * see * - * * - * scripts/generate_motion_derivative_coefficients.py * - * * - * for how this code is generated * - * * - ******************************************************************************/ -static void motion_derivative_coefficients(const float *p, float *coeff) -{ - coeff[0] = -p[1] + p[4] - p[7]*p[9]*p[23] + p[7]*p[9]*p[32] + p[7]*p[10]*p[21] - p[7]*p[10]*p[30] - p[8]*p[9]*p[21] + p[8]*p[9]*p[30] - p[8]*p[10]*p[23] + p[8]*p[10]*p[32] + p[9]*p[9]*p[18] - p[9]*p[9]*p[27] + p[10]*p[10]*p[18] - p[10]*p[10]*p[27] - p[11]*p[13]*p[23] + p[11]*p[13]*p[32] + p[11]*p[14]*p[21] - p[11]*p[14]*p[30] - p[12]*p[13]*p[21] + p[12]*p[13]*p[30] - p[12]*p[14]*p[23] + p[12]*p[14]*p[32] + p[13]*p[13]*p[18] - p[13]*p[13]*p[27] + p[14]*p[14]*p[18] - p[14]*p[14]*p[27] - p[18] + p[27]; - coeff[1] = 2*p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - p[14]*p[14]*p[24] - 2*p[15] + p[24]; - coeff[2] = 2*p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - p[14]*p[14]*p[25] - 2*p[16] + p[25]; - coeff[3] = -2*p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - p[14]*p[14]*p[26] - 2*p[17] + p[26]; - coeff[4] = (-p[9]*p[9] - p[10]*p[10] - p[13]*p[13] - p[14]*p[14] + 1)*p[15]; - coeff[5] = -p[7]*p[10]*p[19] + p[8]*p[9]*p[19] - p[9]*p[9]*p[16] - p[10]*p[10]*p[16] - p[11]*p[14]*p[19] + p[12]*p[13]*p[19] - p[13]*p[13]*p[16] - p[14]*p[14]*p[16] + p[16]; - coeff[6] = p[7]*p[9]*p[22] - p[7]*p[10]*p[20] + p[8]*p[9]*p[20] + p[8]*p[10]*p[22] - p[9]*p[9]*p[17] - p[10]*p[10]*p[17] + p[11]*p[13]*p[22] - p[11]*p[14]*p[20] + p[12]*p[13]*p[20] + p[12]*p[14]*p[22] - p[13]*p[13]*p[17] - p[14]*p[14]*p[17] + p[17]; - coeff[7] = 0; - coeff[8] = -2*p[9]*p[9]*p[15] + 2*p[9]*p[9]*p[24] - 2*p[10]*p[10]*p[15] + 2*p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + 2*p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + 2*p[14]*p[14]*p[24] + 2*p[15] - 2*p[24]; - coeff[9] = -2*p[7]*p[10]*p[19] + 2*p[7]*p[10]*p[28] + 2*p[8]*p[9]*p[19] - 2*p[8]*p[9]*p[28] - 2*p[9]*p[9]*p[16] + 2*p[9]*p[9]*p[25] - 2*p[10]*p[10]*p[16] + 2*p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + 2*p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - 2*p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + 2*p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + 2*p[14]*p[14]*p[25] + 2*p[16] - 2*p[25]; - coeff[10] = 2*p[7]*p[9]*p[22] - 2*p[7]*p[9]*p[31] - 2*p[7]*p[10]*p[20] + 2*p[7]*p[10]*p[29] + 2*p[8]*p[9]*p[20] - 2*p[8]*p[9]*p[29] + 2*p[8]*p[10]*p[22] - 2*p[8]*p[10]*p[31] - 2*p[9]*p[9]*p[17] + 2*p[9]*p[9]*p[26] - 2*p[10]*p[10]*p[17] + 2*p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - 2*p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + 2*p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - 2*p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - 2*p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + 2*p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + 2*p[14]*p[14]*p[26] + 2*p[17] - 2*p[26]; - coeff[11] = 2*p[9]*p[9]*p[15] - 2*p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - 2*p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - 2*p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - 2*p[14]*p[14]*p[24] - 2*p[15] + 2*p[24]; - coeff[12] = 2*p[7]*p[10]*p[19] - 2*p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + 2*p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - 2*p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - 2*p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - 2*p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + 2*p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - 2*p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - 2*p[14]*p[14]*p[25] - 2*p[16] + 2*p[25]; - coeff[13] = -2*p[7]*p[9]*p[22] + 2*p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - 2*p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + 2*p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + 2*p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - 2*p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - 2*p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + 2*p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - 2*p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + 2*p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + 2*p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - 2*p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - 2*p[14]*p[14]*p[26] - 2*p[17] + 2*p[26]; - coeff[14] = 2*p[0]*p[7]*p[11]*p[18] + 2*p[0]*p[7]*p[13]*p[23] - 2*p[0]*p[7]*p[14]*p[21] + 2*p[0]*p[8]*p[12]*p[18] + 2*p[0]*p[8]*p[13]*p[21] + 2*p[0]*p[8]*p[14]*p[23] + 2*p[0]*p[9]*p[11]*p[23] + 2*p[0]*p[9]*p[12]*p[21] - 2*p[0]*p[9]*p[13]*p[18] - 2*p[0]*p[10]*p[11]*p[21] + 2*p[0]*p[10]*p[12]*p[23] - 2*p[0]*p[10]*p[14]*p[18] - p[7]*p[9]*p[23] + p[7]*p[9]*p[32] + p[7]*p[10]*p[21] - p[7]*p[10]*p[30] - p[8]*p[9]*p[21] + p[8]*p[9]*p[30] - p[8]*p[10]*p[23] + p[8]*p[10]*p[32] + p[9]*p[9]*p[18] - p[9]*p[9]*p[27] + p[10]*p[10]*p[18] - p[10]*p[10]*p[27] + p[11]*p[13]*p[23] - p[11]*p[13]*p[32] - p[11]*p[14]*p[21] + p[11]*p[14]*p[30] + p[12]*p[13]*p[21] - p[12]*p[13]*p[30] + p[12]*p[14]*p[23] - p[12]*p[14]*p[32] - p[13]*p[13]*p[18] + p[13]*p[13]*p[27] - p[14]*p[14]*p[18] + p[14]*p[14]*p[27]; - coeff[15] = 2*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[8]*p[12]*p[15] - 2*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[10]*p[14]*p[15] + 2*p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + p[14]*p[14]*p[24]; - coeff[16] = 2*p[0]*p[7]*p[11]*p[16] - 2*p[0]*p[7]*p[14]*p[19] + 2*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[9]*p[12]*p[19] - 2*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[14]*p[16] + 2*p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + p[14]*p[14]*p[25]; - coeff[17] = 2*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[13]*p[22] - 2*p[0]*p[7]*p[14]*p[20] + 2*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[12]*p[20] - 2*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[10]*p[11]*p[20] + 2*p[0]*p[10]*p[12]*p[22] - 2*p[0]*p[10]*p[14]*p[17] - 2*p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + p[14]*p[14]*p[26]; - coeff[18] = (-p[9]*p[9] - p[10]*p[10] + p[13]*p[13] + p[14]*p[14])*p[15]; - coeff[19] = -p[7]*p[10]*p[19] + p[8]*p[9]*p[19] - p[9]*p[9]*p[16] - p[10]*p[10]*p[16] + p[11]*p[14]*p[19] - p[12]*p[13]*p[19] + p[13]*p[13]*p[16] + p[14]*p[14]*p[16]; - coeff[20] = p[7]*p[9]*p[22] - p[7]*p[10]*p[20] + p[8]*p[9]*p[20] + p[8]*p[10]*p[22] - p[9]*p[9]*p[17] - p[10]*p[10]*p[17] - p[11]*p[13]*p[22] + p[11]*p[14]*p[20] - p[12]*p[13]*p[20] - p[12]*p[14]*p[22] + p[13]*p[13]*p[17] + p[14]*p[14]*p[17]; - coeff[21] = 2*(-p[7]*p[11]*p[18] + p[7]*p[11]*p[27] - p[7]*p[13]*p[23] + p[7]*p[13]*p[32] + p[7]*p[14]*p[21] - p[7]*p[14]*p[30] - p[8]*p[12]*p[18] + p[8]*p[12]*p[27] - p[8]*p[13]*p[21] + p[8]*p[13]*p[30] - p[8]*p[14]*p[23] + p[8]*p[14]*p[32] - p[9]*p[11]*p[23] + p[9]*p[11]*p[32] - p[9]*p[12]*p[21] + p[9]*p[12]*p[30] + p[9]*p[13]*p[18] - p[9]*p[13]*p[27] + p[10]*p[11]*p[21] - p[10]*p[11]*p[30] - p[10]*p[12]*p[23] + p[10]*p[12]*p[32] + p[10]*p[14]*p[18] - p[10]*p[14]*p[27])*p[0]; - coeff[22] = -4*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[7]*p[11]*p[24] - 4*p[0]*p[8]*p[12]*p[15] + 2*p[0]*p[8]*p[12]*p[24] + 4*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[9]*p[13]*p[24] + 4*p[0]*p[10]*p[14]*p[15] - 2*p[0]*p[10]*p[14]*p[24] - 2*p[9]*p[9]*p[15] + 2*p[9]*p[9]*p[24] - 2*p[10]*p[10]*p[15] + 2*p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - 2*p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - 2*p[14]*p[14]*p[24]; - coeff[23] = -4*p[0]*p[7]*p[11]*p[16] + 2*p[0]*p[7]*p[11]*p[25] + 4*p[0]*p[7]*p[14]*p[19] - 2*p[0]*p[7]*p[14]*p[28] - 4*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[12]*p[25] - 4*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[8]*p[13]*p[28] - 4*p[0]*p[9]*p[12]*p[19] + 2*p[0]*p[9]*p[12]*p[28] + 4*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[9]*p[13]*p[25] + 4*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[11]*p[28] + 4*p[0]*p[10]*p[14]*p[16] - 2*p[0]*p[10]*p[14]*p[25] - 2*p[7]*p[10]*p[19] + 2*p[7]*p[10]*p[28] + 2*p[8]*p[9]*p[19] - 2*p[8]*p[9]*p[28] - 2*p[9]*p[9]*p[16] + 2*p[9]*p[9]*p[25] - 2*p[10]*p[10]*p[16] + 2*p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - 2*p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + 2*p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - 2*p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - 2*p[14]*p[14]*p[25]; - coeff[24] = -4*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[11]*p[26] - 4*p[0]*p[7]*p[13]*p[22] + 2*p[0]*p[7]*p[13]*p[31] + 4*p[0]*p[7]*p[14]*p[20] - 2*p[0]*p[7]*p[14]*p[29] - 4*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[12]*p[26] - 4*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[13]*p[29] - 4*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[8]*p[14]*p[31] - 4*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[11]*p[31] - 4*p[0]*p[9]*p[12]*p[20] + 2*p[0]*p[9]*p[12]*p[29] + 4*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[9]*p[13]*p[26] + 4*p[0]*p[10]*p[11]*p[20] - 2*p[0]*p[10]*p[11]*p[29] - 4*p[0]*p[10]*p[12]*p[22] + 2*p[0]*p[10]*p[12]*p[31] + 4*p[0]*p[10]*p[14]*p[17] - 2*p[0]*p[10]*p[14]*p[26] + 2*p[7]*p[9]*p[22] - 2*p[7]*p[9]*p[31] - 2*p[7]*p[10]*p[20] + 2*p[7]*p[10]*p[29] + 2*p[8]*p[9]*p[20] - 2*p[8]*p[9]*p[29] + 2*p[8]*p[10]*p[22] - 2*p[8]*p[10]*p[31] - 2*p[9]*p[9]*p[17] + 2*p[9]*p[9]*p[26] - 2*p[10]*p[10]*p[17] + 2*p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + 2*p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - 2*p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + 2*p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + 2*p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - 2*p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - 2*p[14]*p[14]*p[26]; - coeff[25] = 2*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[8]*p[12]*p[15] - 2*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[10]*p[14]*p[15] + 2*p[9]*p[9]*p[15] - 2*p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - 2*p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + 2*p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + 2*p[14]*p[14]*p[24]; - coeff[26] = 2*p[0]*p[7]*p[11]*p[16] - 2*p[0]*p[7]*p[14]*p[19] + 2*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[9]*p[12]*p[19] - 2*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[14]*p[16] + 2*p[7]*p[10]*p[19] - 2*p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + 2*p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - 2*p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - 2*p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + 2*p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - 2*p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + 2*p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + 2*p[14]*p[14]*p[25]; - coeff[27] = 2*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[13]*p[22] - 2*p[0]*p[7]*p[14]*p[20] + 2*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[12]*p[20] - 2*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[10]*p[11]*p[20] + 2*p[0]*p[10]*p[12]*p[22] - 2*p[0]*p[10]*p[14]*p[17] - 2*p[7]*p[9]*p[22] + 2*p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - 2*p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + 2*p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + 2*p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - 2*p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - 2*p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - 2*p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + 2*p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - 2*p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - 2*p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + 2*p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + 2*p[14]*p[14]*p[26]; - coeff[28] = 0; - coeff[29] = 2*(p[7]*p[11]*p[15] - p[7]*p[11]*p[24] + p[8]*p[12]*p[15] - p[8]*p[12]*p[24] - p[9]*p[13]*p[15] + p[9]*p[13]*p[24] - p[10]*p[14]*p[15] + p[10]*p[14]*p[24])*p[0]; - coeff[30] = 2*(p[7]*p[11]*p[16] - p[7]*p[11]*p[25] - p[7]*p[14]*p[19] + p[7]*p[14]*p[28] + p[8]*p[12]*p[16] - p[8]*p[12]*p[25] + p[8]*p[13]*p[19] - p[8]*p[13]*p[28] + p[9]*p[12]*p[19] - p[9]*p[12]*p[28] - p[9]*p[13]*p[16] + p[9]*p[13]*p[25] - p[10]*p[11]*p[19] + p[10]*p[11]*p[28] - p[10]*p[14]*p[16] + p[10]*p[14]*p[25])*p[0]; - coeff[31] = 2*(p[7]*p[11]*p[17] - p[7]*p[11]*p[26] + p[7]*p[13]*p[22] - p[7]*p[13]*p[31] - p[7]*p[14]*p[20] + p[7]*p[14]*p[29] + p[8]*p[12]*p[17] - p[8]*p[12]*p[26] + p[8]*p[13]*p[20] - p[8]*p[13]*p[29] + p[8]*p[14]*p[22] - p[8]*p[14]*p[31] + p[9]*p[11]*p[22] - p[9]*p[11]*p[31] + p[9]*p[12]*p[20] - p[9]*p[12]*p[29] - p[9]*p[13]*p[17] + p[9]*p[13]*p[26] - p[10]*p[11]*p[20] + p[10]*p[11]*p[29] + p[10]*p[12]*p[22] - p[10]*p[12]*p[31] - p[10]*p[14]*p[17] + p[10]*p[14]*p[26])*p[0]; - coeff[32] = 2*(-p[7]*p[11]*p[15] + p[7]*p[11]*p[24] - p[8]*p[12]*p[15] + p[8]*p[12]*p[24] + p[9]*p[13]*p[15] - p[9]*p[13]*p[24] + p[10]*p[14]*p[15] - p[10]*p[14]*p[24])*p[0]; - coeff[33] = 2*(-p[7]*p[11]*p[16] + p[7]*p[11]*p[25] + p[7]*p[14]*p[19] - p[7]*p[14]*p[28] - p[8]*p[12]*p[16] + p[8]*p[12]*p[25] - p[8]*p[13]*p[19] + p[8]*p[13]*p[28] - p[9]*p[12]*p[19] + p[9]*p[12]*p[28] + p[9]*p[13]*p[16] - p[9]*p[13]*p[25] + p[10]*p[11]*p[19] - p[10]*p[11]*p[28] + p[10]*p[14]*p[16] - p[10]*p[14]*p[25])*p[0]; - coeff[34] = 2*(-p[7]*p[11]*p[17] + p[7]*p[11]*p[26] - p[7]*p[13]*p[22] + p[7]*p[13]*p[31] + p[7]*p[14]*p[20] - p[7]*p[14]*p[29] - p[8]*p[12]*p[17] + p[8]*p[12]*p[26] - p[8]*p[13]*p[20] + p[8]*p[13]*p[29] - p[8]*p[14]*p[22] + p[8]*p[14]*p[31] - p[9]*p[11]*p[22] + p[9]*p[11]*p[31] - p[9]*p[12]*p[20] + p[9]*p[12]*p[29] + p[9]*p[13]*p[17] - p[9]*p[13]*p[26] + p[10]*p[11]*p[20] - p[10]*p[11]*p[29] - p[10]*p[12]*p[22] + p[10]*p[12]*p[31] + p[10]*p[14]*p[17] - p[10]*p[14]*p[26])*p[0]; - coeff[35] = -2*p[0]*p[7]*p[9]*p[23] + 2*p[0]*p[7]*p[10]*p[21] - 2*p[0]*p[8]*p[9]*p[21] - 2*p[0]*p[8]*p[10]*p[23] + 2*p[0]*p[9]*p[9]*p[18] + 2*p[0]*p[10]*p[10]*p[18] + 2*p[0]*p[11]*p[13]*p[23] - 2*p[0]*p[11]*p[14]*p[21] + 2*p[0]*p[12]*p[13]*p[21] + 2*p[0]*p[12]*p[14]*p[23] - 2*p[0]*p[13]*p[13]*p[18] - 2*p[0]*p[14]*p[14]*p[18] - p[7]*p[11]*p[18] + p[7]*p[11]*p[27] - p[7]*p[13]*p[23] + p[7]*p[13]*p[32] + p[7]*p[14]*p[21] - p[7]*p[14]*p[30] - p[8]*p[12]*p[18] + p[8]*p[12]*p[27] - p[8]*p[13]*p[21] + p[8]*p[13]*p[30] - p[8]*p[14]*p[23] + p[8]*p[14]*p[32] - p[9]*p[11]*p[23] + p[9]*p[11]*p[32] - p[9]*p[12]*p[21] + p[9]*p[12]*p[30] + p[9]*p[13]*p[18] - p[9]*p[13]*p[27] + p[10]*p[11]*p[21] - p[10]*p[11]*p[30] - p[10]*p[12]*p[23] + p[10]*p[12]*p[32] + p[10]*p[14]*p[18] - p[10]*p[14]*p[27]; - coeff[36] = 2*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[10]*p[10]*p[15] - 2*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[14]*p[14]*p[15] - 2*p[7]*p[11]*p[15] + p[7]*p[11]*p[24] - 2*p[8]*p[12]*p[15] + p[8]*p[12]*p[24] + 2*p[9]*p[13]*p[15] - p[9]*p[13]*p[24] + 2*p[10]*p[14]*p[15] - p[10]*p[14]*p[24]; - coeff[37] = 2*p[0]*p[7]*p[10]*p[19] - 2*p[0]*p[8]*p[9]*p[19] + 2*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[16] - 2*p[0]*p[11]*p[14]*p[19] + 2*p[0]*p[12]*p[13]*p[19] - 2*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[16] - 2*p[7]*p[11]*p[16] + p[7]*p[11]*p[25] + 2*p[7]*p[14]*p[19] - p[7]*p[14]*p[28] - 2*p[8]*p[12]*p[16] + p[8]*p[12]*p[25] - 2*p[8]*p[13]*p[19] + p[8]*p[13]*p[28] - 2*p[9]*p[12]*p[19] + p[9]*p[12]*p[28] + 2*p[9]*p[13]*p[16] - p[9]*p[13]*p[25] + 2*p[10]*p[11]*p[19] - p[10]*p[11]*p[28] + 2*p[10]*p[14]*p[16] - p[10]*p[14]*p[25]; - coeff[38] = -2*p[0]*p[7]*p[9]*p[22] + 2*p[0]*p[7]*p[10]*p[20] - 2*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[10]*p[22] + 2*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[11]*p[13]*p[22] - 2*p[0]*p[11]*p[14]*p[20] + 2*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[14]*p[22] - 2*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[14]*p[14]*p[17] - 2*p[7]*p[11]*p[17] + p[7]*p[11]*p[26] - 2*p[7]*p[13]*p[22] + p[7]*p[13]*p[31] + 2*p[7]*p[14]*p[20] - p[7]*p[14]*p[29] - 2*p[8]*p[12]*p[17] + p[8]*p[12]*p[26] - 2*p[8]*p[13]*p[20] + p[8]*p[13]*p[29] - 2*p[8]*p[14]*p[22] + p[8]*p[14]*p[31] - 2*p[9]*p[11]*p[22] + p[9]*p[11]*p[31] - 2*p[9]*p[12]*p[20] + p[9]*p[12]*p[29] + 2*p[9]*p[13]*p[17] - p[9]*p[13]*p[26] + 2*p[10]*p[11]*p[20] - p[10]*p[11]*p[29] - 2*p[10]*p[12]*p[22] + p[10]*p[12]*p[31] + 2*p[10]*p[14]*p[17] - p[10]*p[14]*p[26]; - coeff[39] = (p[7]*p[11] + p[8]*p[12] - p[9]*p[13] - p[10]*p[14])*p[15]; - coeff[40] = p[7]*p[11]*p[16] - p[7]*p[14]*p[19] + p[8]*p[12]*p[16] + p[8]*p[13]*p[19] + p[9]*p[12]*p[19] - p[9]*p[13]*p[16] - p[10]*p[11]*p[19] - p[10]*p[14]*p[16]; - coeff[41] = p[7]*p[11]*p[17] + p[7]*p[13]*p[22] - p[7]*p[14]*p[20] + p[8]*p[12]*p[17] + p[8]*p[13]*p[20] + p[8]*p[14]*p[22] + p[9]*p[11]*p[22] + p[9]*p[12]*p[20] - p[9]*p[13]*p[17] - p[10]*p[11]*p[20] + p[10]*p[12]*p[22] - p[10]*p[14]*p[17]; - coeff[42] = 2*(p[7]*p[9]*p[23] - p[7]*p[9]*p[32] - p[7]*p[10]*p[21] + p[7]*p[10]*p[30] + p[8]*p[9]*p[21] - p[8]*p[9]*p[30] + p[8]*p[10]*p[23] - p[8]*p[10]*p[32] - p[9]*p[9]*p[18] + p[9]*p[9]*p[27] - p[10]*p[10]*p[18] + p[10]*p[10]*p[27] - p[11]*p[13]*p[23] + p[11]*p[13]*p[32] + p[11]*p[14]*p[21] - p[11]*p[14]*p[30] - p[12]*p[13]*p[21] + p[12]*p[13]*p[30] - p[12]*p[14]*p[23] + p[12]*p[14]*p[32] + p[13]*p[13]*p[18] - p[13]*p[13]*p[27] + p[14]*p[14]*p[18] - p[14]*p[14]*p[27])*p[0]; - coeff[43] = -4*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[9]*p[9]*p[24] - 4*p[0]*p[10]*p[10]*p[15] + 2*p[0]*p[10]*p[10]*p[24] + 4*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[13]*p[13]*p[24] + 4*p[0]*p[14]*p[14]*p[15] - 2*p[0]*p[14]*p[14]*p[24] + 2*p[7]*p[11]*p[15] - 2*p[7]*p[11]*p[24] + 2*p[8]*p[12]*p[15] - 2*p[8]*p[12]*p[24] - 2*p[9]*p[13]*p[15] + 2*p[9]*p[13]*p[24] - 2*p[10]*p[14]*p[15] + 2*p[10]*p[14]*p[24]; - coeff[44] = -4*p[0]*p[7]*p[10]*p[19] + 2*p[0]*p[7]*p[10]*p[28] + 4*p[0]*p[8]*p[9]*p[19] - 2*p[0]*p[8]*p[9]*p[28] - 4*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[9]*p[9]*p[25] - 4*p[0]*p[10]*p[10]*p[16] + 2*p[0]*p[10]*p[10]*p[25] + 4*p[0]*p[11]*p[14]*p[19] - 2*p[0]*p[11]*p[14]*p[28] - 4*p[0]*p[12]*p[13]*p[19] + 2*p[0]*p[12]*p[13]*p[28] + 4*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[13]*p[13]*p[25] + 4*p[0]*p[14]*p[14]*p[16] - 2*p[0]*p[14]*p[14]*p[25] + 2*p[7]*p[11]*p[16] - 2*p[7]*p[11]*p[25] - 2*p[7]*p[14]*p[19] + 2*p[7]*p[14]*p[28] + 2*p[8]*p[12]*p[16] - 2*p[8]*p[12]*p[25] + 2*p[8]*p[13]*p[19] - 2*p[8]*p[13]*p[28] + 2*p[9]*p[12]*p[19] - 2*p[9]*p[12]*p[28] - 2*p[9]*p[13]*p[16] + 2*p[9]*p[13]*p[25] - 2*p[10]*p[11]*p[19] + 2*p[10]*p[11]*p[28] - 2*p[10]*p[14]*p[16] + 2*p[10]*p[14]*p[25]; - coeff[45] = 4*p[0]*p[7]*p[9]*p[22] - 2*p[0]*p[7]*p[9]*p[31] - 4*p[0]*p[7]*p[10]*p[20] + 2*p[0]*p[7]*p[10]*p[29] + 4*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[9]*p[29] + 4*p[0]*p[8]*p[10]*p[22] - 2*p[0]*p[8]*p[10]*p[31] - 4*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[9]*p[9]*p[26] - 4*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[10]*p[10]*p[26] - 4*p[0]*p[11]*p[13]*p[22] + 2*p[0]*p[11]*p[13]*p[31] + 4*p[0]*p[11]*p[14]*p[20] - 2*p[0]*p[11]*p[14]*p[29] - 4*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[13]*p[29] - 4*p[0]*p[12]*p[14]*p[22] + 2*p[0]*p[12]*p[14]*p[31] + 4*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[13]*p[13]*p[26] + 4*p[0]*p[14]*p[14]*p[17] - 2*p[0]*p[14]*p[14]*p[26] + 2*p[7]*p[11]*p[17] - 2*p[7]*p[11]*p[26] + 2*p[7]*p[13]*p[22] - 2*p[7]*p[13]*p[31] - 2*p[7]*p[14]*p[20] + 2*p[7]*p[14]*p[29] + 2*p[8]*p[12]*p[17] - 2*p[8]*p[12]*p[26] + 2*p[8]*p[13]*p[20] - 2*p[8]*p[13]*p[29] + 2*p[8]*p[14]*p[22] - 2*p[8]*p[14]*p[31] + 2*p[9]*p[11]*p[22] - 2*p[9]*p[11]*p[31] + 2*p[9]*p[12]*p[20] - 2*p[9]*p[12]*p[29] - 2*p[9]*p[13]*p[17] + 2*p[9]*p[13]*p[26] - 2*p[10]*p[11]*p[20] + 2*p[10]*p[11]*p[29] + 2*p[10]*p[12]*p[22] - 2*p[10]*p[12]*p[31] - 2*p[10]*p[14]*p[17] + 2*p[10]*p[14]*p[26]; - coeff[46] = 2*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[10]*p[10]*p[15] - 2*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[14]*p[14]*p[15] - 2*p[7]*p[11]*p[15] + 2*p[7]*p[11]*p[24] - 2*p[8]*p[12]*p[15] + 2*p[8]*p[12]*p[24] + 2*p[9]*p[13]*p[15] - 2*p[9]*p[13]*p[24] + 2*p[10]*p[14]*p[15] - 2*p[10]*p[14]*p[24]; - coeff[47] = 2*p[0]*p[7]*p[10]*p[19] - 2*p[0]*p[8]*p[9]*p[19] + 2*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[16] - 2*p[0]*p[11]*p[14]*p[19] + 2*p[0]*p[12]*p[13]*p[19] - 2*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[16] - 2*p[7]*p[11]*p[16] + 2*p[7]*p[11]*p[25] + 2*p[7]*p[14]*p[19] - 2*p[7]*p[14]*p[28] - 2*p[8]*p[12]*p[16] + 2*p[8]*p[12]*p[25] - 2*p[8]*p[13]*p[19] + 2*p[8]*p[13]*p[28] - 2*p[9]*p[12]*p[19] + 2*p[9]*p[12]*p[28] + 2*p[9]*p[13]*p[16] - 2*p[9]*p[13]*p[25] + 2*p[10]*p[11]*p[19] - 2*p[10]*p[11]*p[28] + 2*p[10]*p[14]*p[16] - 2*p[10]*p[14]*p[25]; - coeff[48] = -2*p[0]*p[7]*p[9]*p[22] + 2*p[0]*p[7]*p[10]*p[20] - 2*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[10]*p[22] + 2*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[11]*p[13]*p[22] - 2*p[0]*p[11]*p[14]*p[20] + 2*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[14]*p[22] - 2*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[14]*p[14]*p[17] - 2*p[7]*p[11]*p[17] + 2*p[7]*p[11]*p[26] - 2*p[7]*p[13]*p[22] + 2*p[7]*p[13]*p[31] + 2*p[7]*p[14]*p[20] - 2*p[7]*p[14]*p[29] - 2*p[8]*p[12]*p[17] + 2*p[8]*p[12]*p[26] - 2*p[8]*p[13]*p[20] + 2*p[8]*p[13]*p[29] - 2*p[8]*p[14]*p[22] + 2*p[8]*p[14]*p[31] - 2*p[9]*p[11]*p[22] + 2*p[9]*p[11]*p[31] - 2*p[9]*p[12]*p[20] + 2*p[9]*p[12]*p[29] + 2*p[9]*p[13]*p[17] - 2*p[9]*p[13]*p[26] + 2*p[10]*p[11]*p[20] - 2*p[10]*p[11]*p[29] - 2*p[10]*p[12]*p[22] + 2*p[10]*p[12]*p[31] + 2*p[10]*p[14]*p[17] - 2*p[10]*p[14]*p[26]; - coeff[49] = 0; - coeff[50] = 2*(p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + p[10]*p[10]*p[15] - p[10]*p[10]*p[24] - p[13]*p[13]*p[15] + p[13]*p[13]*p[24] - p[14]*p[14]*p[15] + p[14]*p[14]*p[24])*p[0]; - coeff[51] = 2*(p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + p[10]*p[10]*p[16] - p[10]*p[10]*p[25] - p[11]*p[14]*p[19] + p[11]*p[14]*p[28] + p[12]*p[13]*p[19] - p[12]*p[13]*p[28] - p[13]*p[13]*p[16] + p[13]*p[13]*p[25] - p[14]*p[14]*p[16] + p[14]*p[14]*p[25])*p[0]; - coeff[52] = 2*(-p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + p[10]*p[10]*p[17] - p[10]*p[10]*p[26] + p[11]*p[13]*p[22] - p[11]*p[13]*p[31] - p[11]*p[14]*p[20] + p[11]*p[14]*p[29] + p[12]*p[13]*p[20] - p[12]*p[13]*p[29] + p[12]*p[14]*p[22] - p[12]*p[14]*p[31] - p[13]*p[13]*p[17] + p[13]*p[13]*p[26] - p[14]*p[14]*p[17] + p[14]*p[14]*p[26])*p[0]; - coeff[53] = 2*(-p[9]*p[9]*p[15] + p[9]*p[9]*p[24] - p[10]*p[10]*p[15] + p[10]*p[10]*p[24] + p[13]*p[13]*p[15] - p[13]*p[13]*p[24] + p[14]*p[14]*p[15] - p[14]*p[14]*p[24])*p[0]; - coeff[54] = 2*(-p[7]*p[10]*p[19] + p[7]*p[10]*p[28] + p[8]*p[9]*p[19] - p[8]*p[9]*p[28] - p[9]*p[9]*p[16] + p[9]*p[9]*p[25] - p[10]*p[10]*p[16] + p[10]*p[10]*p[25] + p[11]*p[14]*p[19] - p[11]*p[14]*p[28] - p[12]*p[13]*p[19] + p[12]*p[13]*p[28] + p[13]*p[13]*p[16] - p[13]*p[13]*p[25] + p[14]*p[14]*p[16] - p[14]*p[14]*p[25])*p[0]; - coeff[55] = 2*(p[7]*p[9]*p[22] - p[7]*p[9]*p[31] - p[7]*p[10]*p[20] + p[7]*p[10]*p[29] + p[8]*p[9]*p[20] - p[8]*p[9]*p[29] + p[8]*p[10]*p[22] - p[8]*p[10]*p[31] - p[9]*p[9]*p[17] + p[9]*p[9]*p[26] - p[10]*p[10]*p[17] + p[10]*p[10]*p[26] - p[11]*p[13]*p[22] + p[11]*p[13]*p[31] + p[11]*p[14]*p[20] - p[11]*p[14]*p[29] - p[12]*p[13]*p[20] + p[12]*p[13]*p[29] - p[12]*p[14]*p[22] + p[12]*p[14]*p[31] + p[13]*p[13]*p[17] - p[13]*p[13]*p[26] + p[14]*p[14]*p[17] - p[14]*p[14]*p[26])*p[0]; - coeff[56] = -p[2] + p[5] + p[7]*p[8]*p[23] - p[7]*p[8]*p[32] - p[7]*p[10]*p[18] + p[7]*p[10]*p[27] + p[8]*p[8]*p[21] - p[8]*p[8]*p[30] - p[8]*p[9]*p[18] + p[8]*p[9]*p[27] - p[9]*p[10]*p[23] + p[9]*p[10]*p[32] + p[10]*p[10]*p[21] - p[10]*p[10]*p[30] + p[11]*p[12]*p[23] - p[11]*p[12]*p[32] - p[11]*p[14]*p[18] + p[11]*p[14]*p[27] + p[12]*p[12]*p[21] - p[12]*p[12]*p[30] - p[12]*p[13]*p[18] + p[12]*p[13]*p[27] - p[13]*p[14]*p[23] + p[13]*p[14]*p[32] + p[14]*p[14]*p[21] - p[14]*p[14]*p[30] - p[21] + p[30]; - coeff[57] = -2*p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + p[12]*p[13]*p[24]; - coeff[58] = -2*p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - p[14]*p[14]*p[28] - 2*p[19] + p[28]; - coeff[59] = 2*p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - p[14]*p[14]*p[29] - 2*p[20] + p[29]; - coeff[60] = (p[7]*p[10] + p[8]*p[9] + p[11]*p[14] + p[12]*p[13])*p[15]; - coeff[61] = p[7]*p[10]*p[16] - p[8]*p[8]*p[19] + p[8]*p[9]*p[16] - p[10]*p[10]*p[19] + p[11]*p[14]*p[16] - p[12]*p[12]*p[19] + p[12]*p[13]*p[16] - p[14]*p[14]*p[19] + p[19]; - coeff[62] = -p[7]*p[8]*p[22] + p[7]*p[10]*p[17] - p[8]*p[8]*p[20] + p[8]*p[9]*p[17] + p[9]*p[10]*p[22] - p[10]*p[10]*p[20] - p[11]*p[12]*p[22] + p[11]*p[14]*p[17] - p[12]*p[12]*p[20] + p[12]*p[13]*p[17] + p[13]*p[14]*p[22] - p[14]*p[14]*p[20] + p[20]; - coeff[63] = 0; - coeff[64] = 2*p[7]*p[10]*p[15] - 2*p[7]*p[10]*p[24] + 2*p[8]*p[9]*p[15] - 2*p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - 2*p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - 2*p[12]*p[13]*p[24]; - coeff[65] = 2*p[7]*p[10]*p[16] - 2*p[7]*p[10]*p[25] - 2*p[8]*p[8]*p[19] + 2*p[8]*p[8]*p[28] + 2*p[8]*p[9]*p[16] - 2*p[8]*p[9]*p[25] - 2*p[10]*p[10]*p[19] + 2*p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - 2*p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + 2*p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - 2*p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + 2*p[14]*p[14]*p[28] + 2*p[19] - 2*p[28]; - coeff[66] = -2*p[7]*p[8]*p[22] + 2*p[7]*p[8]*p[31] + 2*p[7]*p[10]*p[17] - 2*p[7]*p[10]*p[26] - 2*p[8]*p[8]*p[20] + 2*p[8]*p[8]*p[29] + 2*p[8]*p[9]*p[17] - 2*p[8]*p[9]*p[26] + 2*p[9]*p[10]*p[22] - 2*p[9]*p[10]*p[31] - 2*p[10]*p[10]*p[20] + 2*p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + 2*p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - 2*p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + 2*p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - 2*p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - 2*p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + 2*p[14]*p[14]*p[29] + 2*p[20] - 2*p[29]; - coeff[67] = -2*p[7]*p[10]*p[15] + 2*p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + 2*p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + 2*p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + 2*p[12]*p[13]*p[24]; - coeff[68] = -2*p[7]*p[10]*p[16] + 2*p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - 2*p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + 2*p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - 2*p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + 2*p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - 2*p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + 2*p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - 2*p[14]*p[14]*p[28] - 2*p[19] + 2*p[28]; - coeff[69] = 2*p[7]*p[8]*p[22] - 2*p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + 2*p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - 2*p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + 2*p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + 2*p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - 2*p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - 2*p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + 2*p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - 2*p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + 2*p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + 2*p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - 2*p[14]*p[14]*p[29] - 2*p[20] + 2*p[29]; - coeff[70] = 2*p[0]*p[7]*p[11]*p[21] - 2*p[0]*p[7]*p[12]*p[23] + 2*p[0]*p[7]*p[14]*p[18] - 2*p[0]*p[8]*p[11]*p[23] - 2*p[0]*p[8]*p[12]*p[21] + 2*p[0]*p[8]*p[13]*p[18] + 2*p[0]*p[9]*p[12]*p[18] + 2*p[0]*p[9]*p[13]*p[21] + 2*p[0]*p[9]*p[14]*p[23] + 2*p[0]*p[10]*p[11]*p[18] + 2*p[0]*p[10]*p[13]*p[23] - 2*p[0]*p[10]*p[14]*p[21] + p[7]*p[8]*p[23] - p[7]*p[8]*p[32] - p[7]*p[10]*p[18] + p[7]*p[10]*p[27] + p[8]*p[8]*p[21] - p[8]*p[8]*p[30] - p[8]*p[9]*p[18] + p[8]*p[9]*p[27] - p[9]*p[10]*p[23] + p[9]*p[10]*p[32] + p[10]*p[10]*p[21] - p[10]*p[10]*p[30] - p[11]*p[12]*p[23] + p[11]*p[12]*p[32] + p[11]*p[14]*p[18] - p[11]*p[14]*p[27] - p[12]*p[12]*p[21] + p[12]*p[12]*p[30] + p[12]*p[13]*p[18] - p[12]*p[13]*p[27] + p[13]*p[14]*p[23] - p[13]*p[14]*p[32] - p[14]*p[14]*p[21] + p[14]*p[14]*p[30]; - coeff[71] = 2*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[10]*p[11]*p[15] - 2*p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - p[12]*p[13]*p[24]; - coeff[72] = 2*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[14]*p[16] - 2*p[0]*p[8]*p[12]*p[19] + 2*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[10]*p[11]*p[16] - 2*p[0]*p[10]*p[14]*p[19] - 2*p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + p[14]*p[14]*p[28]; - coeff[73] = 2*p[0]*p[7]*p[11]*p[20] - 2*p[0]*p[7]*p[12]*p[22] + 2*p[0]*p[7]*p[14]*p[17] - 2*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[12]*p[20] + 2*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[13]*p[22] - 2*p[0]*p[10]*p[14]*p[20] + 2*p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + p[14]*p[14]*p[29]; - coeff[74] = (p[7]*p[10] + p[8]*p[9] - p[11]*p[14] - p[12]*p[13])*p[15]; - coeff[75] = p[7]*p[10]*p[16] - p[8]*p[8]*p[19] + p[8]*p[9]*p[16] - p[10]*p[10]*p[19] - p[11]*p[14]*p[16] + p[12]*p[12]*p[19] - p[12]*p[13]*p[16] + p[14]*p[14]*p[19]; - coeff[76] = -p[7]*p[8]*p[22] + p[7]*p[10]*p[17] - p[8]*p[8]*p[20] + p[8]*p[9]*p[17] + p[9]*p[10]*p[22] - p[10]*p[10]*p[20] + p[11]*p[12]*p[22] - p[11]*p[14]*p[17] + p[12]*p[12]*p[20] - p[12]*p[13]*p[17] - p[13]*p[14]*p[22] + p[14]*p[14]*p[20]; - coeff[77] = 2*(-p[7]*p[11]*p[21] + p[7]*p[11]*p[30] + p[7]*p[12]*p[23] - p[7]*p[12]*p[32] - p[7]*p[14]*p[18] + p[7]*p[14]*p[27] + p[8]*p[11]*p[23] - p[8]*p[11]*p[32] + p[8]*p[12]*p[21] - p[8]*p[12]*p[30] - p[8]*p[13]*p[18] + p[8]*p[13]*p[27] - p[9]*p[12]*p[18] + p[9]*p[12]*p[27] - p[9]*p[13]*p[21] + p[9]*p[13]*p[30] - p[9]*p[14]*p[23] + p[9]*p[14]*p[32] - p[10]*p[11]*p[18] + p[10]*p[11]*p[27] - p[10]*p[13]*p[23] + p[10]*p[13]*p[32] + p[10]*p[14]*p[21] - p[10]*p[14]*p[30])*p[0]; - coeff[78] = -4*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[7]*p[14]*p[24] - 4*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[8]*p[13]*p[24] - 4*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[9]*p[12]*p[24] - 4*p[0]*p[10]*p[11]*p[15] + 2*p[0]*p[10]*p[11]*p[24] + 2*p[7]*p[10]*p[15] - 2*p[7]*p[10]*p[24] + 2*p[8]*p[9]*p[15] - 2*p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + 2*p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + 2*p[12]*p[13]*p[24]; - coeff[79] = -4*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[11]*p[28] - 4*p[0]*p[7]*p[14]*p[16] + 2*p[0]*p[7]*p[14]*p[25] + 4*p[0]*p[8]*p[12]*p[19] - 2*p[0]*p[8]*p[12]*p[28] - 4*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[8]*p[13]*p[25] - 4*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[12]*p[25] - 4*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[9]*p[13]*p[28] - 4*p[0]*p[10]*p[11]*p[16] + 2*p[0]*p[10]*p[11]*p[25] + 4*p[0]*p[10]*p[14]*p[19] - 2*p[0]*p[10]*p[14]*p[28] + 2*p[7]*p[10]*p[16] - 2*p[7]*p[10]*p[25] - 2*p[8]*p[8]*p[19] + 2*p[8]*p[8]*p[28] + 2*p[8]*p[9]*p[16] - 2*p[8]*p[9]*p[25] - 2*p[10]*p[10]*p[19] + 2*p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + 2*p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - 2*p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + 2*p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - 2*p[14]*p[14]*p[28]; - coeff[80] = -4*p[0]*p[7]*p[11]*p[20] + 2*p[0]*p[7]*p[11]*p[29] + 4*p[0]*p[7]*p[12]*p[22] - 2*p[0]*p[7]*p[12]*p[31] - 4*p[0]*p[7]*p[14]*p[17] + 2*p[0]*p[7]*p[14]*p[26] + 4*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[11]*p[31] + 4*p[0]*p[8]*p[12]*p[20] - 2*p[0]*p[8]*p[12]*p[29] - 4*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[8]*p[13]*p[26] - 4*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[12]*p[26] - 4*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[13]*p[29] - 4*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[9]*p[14]*p[31] - 4*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[11]*p[26] - 4*p[0]*p[10]*p[13]*p[22] + 2*p[0]*p[10]*p[13]*p[31] + 4*p[0]*p[10]*p[14]*p[20] - 2*p[0]*p[10]*p[14]*p[29] - 2*p[7]*p[8]*p[22] + 2*p[7]*p[8]*p[31] + 2*p[7]*p[10]*p[17] - 2*p[7]*p[10]*p[26] - 2*p[8]*p[8]*p[20] + 2*p[8]*p[8]*p[29] + 2*p[8]*p[9]*p[17] - 2*p[8]*p[9]*p[26] + 2*p[9]*p[10]*p[22] - 2*p[9]*p[10]*p[31] - 2*p[10]*p[10]*p[20] + 2*p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - 2*p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + 2*p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - 2*p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + 2*p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + 2*p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - 2*p[14]*p[14]*p[29]; - coeff[81] = 2*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[10]*p[11]*p[15] - 2*p[7]*p[10]*p[15] + 2*p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + 2*p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - 2*p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - 2*p[12]*p[13]*p[24]; - coeff[82] = 2*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[14]*p[16] - 2*p[0]*p[8]*p[12]*p[19] + 2*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[10]*p[11]*p[16] - 2*p[0]*p[10]*p[14]*p[19] - 2*p[7]*p[10]*p[16] + 2*p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - 2*p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + 2*p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - 2*p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - 2*p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + 2*p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - 2*p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + 2*p[14]*p[14]*p[28]; - coeff[83] = 2*p[0]*p[7]*p[11]*p[20] - 2*p[0]*p[7]*p[12]*p[22] + 2*p[0]*p[7]*p[14]*p[17] - 2*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[12]*p[20] + 2*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[13]*p[22] - 2*p[0]*p[10]*p[14]*p[20] + 2*p[7]*p[8]*p[22] - 2*p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + 2*p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - 2*p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + 2*p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + 2*p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - 2*p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + 2*p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - 2*p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + 2*p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - 2*p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - 2*p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + 2*p[14]*p[14]*p[29]; - coeff[84] = 0; - coeff[85] = 2*(p[7]*p[14]*p[15] - p[7]*p[14]*p[24] + p[8]*p[13]*p[15] - p[8]*p[13]*p[24] + p[9]*p[12]*p[15] - p[9]*p[12]*p[24] + p[10]*p[11]*p[15] - p[10]*p[11]*p[24])*p[0]; - coeff[86] = 2*(p[7]*p[11]*p[19] - p[7]*p[11]*p[28] + p[7]*p[14]*p[16] - p[7]*p[14]*p[25] - p[8]*p[12]*p[19] + p[8]*p[12]*p[28] + p[8]*p[13]*p[16] - p[8]*p[13]*p[25] + p[9]*p[12]*p[16] - p[9]*p[12]*p[25] + p[9]*p[13]*p[19] - p[9]*p[13]*p[28] + p[10]*p[11]*p[16] - p[10]*p[11]*p[25] - p[10]*p[14]*p[19] + p[10]*p[14]*p[28])*p[0]; - coeff[87] = 2*(p[7]*p[11]*p[20] - p[7]*p[11]*p[29] - p[7]*p[12]*p[22] + p[7]*p[12]*p[31] + p[7]*p[14]*p[17] - p[7]*p[14]*p[26] - p[8]*p[11]*p[22] + p[8]*p[11]*p[31] - p[8]*p[12]*p[20] + p[8]*p[12]*p[29] + p[8]*p[13]*p[17] - p[8]*p[13]*p[26] + p[9]*p[12]*p[17] - p[9]*p[12]*p[26] + p[9]*p[13]*p[20] - p[9]*p[13]*p[29] + p[9]*p[14]*p[22] - p[9]*p[14]*p[31] + p[10]*p[11]*p[17] - p[10]*p[11]*p[26] + p[10]*p[13]*p[22] - p[10]*p[13]*p[31] - p[10]*p[14]*p[20] + p[10]*p[14]*p[29])*p[0]; - coeff[88] = 2*(-p[7]*p[14]*p[15] + p[7]*p[14]*p[24] - p[8]*p[13]*p[15] + p[8]*p[13]*p[24] - p[9]*p[12]*p[15] + p[9]*p[12]*p[24] - p[10]*p[11]*p[15] + p[10]*p[11]*p[24])*p[0]; - coeff[89] = 2*(-p[7]*p[11]*p[19] + p[7]*p[11]*p[28] - p[7]*p[14]*p[16] + p[7]*p[14]*p[25] + p[8]*p[12]*p[19] - p[8]*p[12]*p[28] - p[8]*p[13]*p[16] + p[8]*p[13]*p[25] - p[9]*p[12]*p[16] + p[9]*p[12]*p[25] - p[9]*p[13]*p[19] + p[9]*p[13]*p[28] - p[10]*p[11]*p[16] + p[10]*p[11]*p[25] + p[10]*p[14]*p[19] - p[10]*p[14]*p[28])*p[0]; - coeff[90] = 2*(-p[7]*p[11]*p[20] + p[7]*p[11]*p[29] + p[7]*p[12]*p[22] - p[7]*p[12]*p[31] - p[7]*p[14]*p[17] + p[7]*p[14]*p[26] + p[8]*p[11]*p[22] - p[8]*p[11]*p[31] + p[8]*p[12]*p[20] - p[8]*p[12]*p[29] - p[8]*p[13]*p[17] + p[8]*p[13]*p[26] - p[9]*p[12]*p[17] + p[9]*p[12]*p[26] - p[9]*p[13]*p[20] + p[9]*p[13]*p[29] - p[9]*p[14]*p[22] + p[9]*p[14]*p[31] - p[10]*p[11]*p[17] + p[10]*p[11]*p[26] - p[10]*p[13]*p[22] + p[10]*p[13]*p[31] + p[10]*p[14]*p[20] - p[10]*p[14]*p[29])*p[0]; - coeff[91] = 2*p[0]*p[7]*p[8]*p[23] - 2*p[0]*p[7]*p[10]*p[18] + 2*p[0]*p[8]*p[8]*p[21] - 2*p[0]*p[8]*p[9]*p[18] - 2*p[0]*p[9]*p[10]*p[23] + 2*p[0]*p[10]*p[10]*p[21] - 2*p[0]*p[11]*p[12]*p[23] + 2*p[0]*p[11]*p[14]*p[18] - 2*p[0]*p[12]*p[12]*p[21] + 2*p[0]*p[12]*p[13]*p[18] + 2*p[0]*p[13]*p[14]*p[23] - 2*p[0]*p[14]*p[14]*p[21] - p[7]*p[11]*p[21] + p[7]*p[11]*p[30] + p[7]*p[12]*p[23] - p[7]*p[12]*p[32] - p[7]*p[14]*p[18] + p[7]*p[14]*p[27] + p[8]*p[11]*p[23] - p[8]*p[11]*p[32] + p[8]*p[12]*p[21] - p[8]*p[12]*p[30] - p[8]*p[13]*p[18] + p[8]*p[13]*p[27] - p[9]*p[12]*p[18] + p[9]*p[12]*p[27] - p[9]*p[13]*p[21] + p[9]*p[13]*p[30] - p[9]*p[14]*p[23] + p[9]*p[14]*p[32] - p[10]*p[11]*p[18] + p[10]*p[11]*p[27] - p[10]*p[13]*p[23] + p[10]*p[13]*p[32] + p[10]*p[14]*p[21] - p[10]*p[14]*p[30]; - coeff[92] = -2*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[8]*p[9]*p[15] + 2*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[12]*p[13]*p[15] - 2*p[7]*p[14]*p[15] + p[7]*p[14]*p[24] - 2*p[8]*p[13]*p[15] + p[8]*p[13]*p[24] - 2*p[9]*p[12]*p[15] + p[9]*p[12]*p[24] - 2*p[10]*p[11]*p[15] + p[10]*p[11]*p[24]; - coeff[93] = -2*p[0]*p[7]*p[10]*p[16] + 2*p[0]*p[8]*p[8]*p[19] - 2*p[0]*p[8]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[11]*p[14]*p[16] - 2*p[0]*p[12]*p[12]*p[19] + 2*p[0]*p[12]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[19] - 2*p[7]*p[11]*p[19] + p[7]*p[11]*p[28] - 2*p[7]*p[14]*p[16] + p[7]*p[14]*p[25] + 2*p[8]*p[12]*p[19] - p[8]*p[12]*p[28] - 2*p[8]*p[13]*p[16] + p[8]*p[13]*p[25] - 2*p[9]*p[12]*p[16] + p[9]*p[12]*p[25] - 2*p[9]*p[13]*p[19] + p[9]*p[13]*p[28] - 2*p[10]*p[11]*p[16] + p[10]*p[11]*p[25] + 2*p[10]*p[14]*p[19] - p[10]*p[14]*p[28]; - coeff[94] = 2*p[0]*p[7]*p[8]*p[22] - 2*p[0]*p[7]*p[10]*p[17] + 2*p[0]*p[8]*p[8]*p[20] - 2*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[9]*p[10]*p[22] + 2*p[0]*p[10]*p[10]*p[20] - 2*p[0]*p[11]*p[12]*p[22] + 2*p[0]*p[11]*p[14]*p[17] - 2*p[0]*p[12]*p[12]*p[20] + 2*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[13]*p[14]*p[22] - 2*p[0]*p[14]*p[14]*p[20] - 2*p[7]*p[11]*p[20] + p[7]*p[11]*p[29] + 2*p[7]*p[12]*p[22] - p[7]*p[12]*p[31] - 2*p[7]*p[14]*p[17] + p[7]*p[14]*p[26] + 2*p[8]*p[11]*p[22] - p[8]*p[11]*p[31] + 2*p[8]*p[12]*p[20] - p[8]*p[12]*p[29] - 2*p[8]*p[13]*p[17] + p[8]*p[13]*p[26] - 2*p[9]*p[12]*p[17] + p[9]*p[12]*p[26] - 2*p[9]*p[13]*p[20] + p[9]*p[13]*p[29] - 2*p[9]*p[14]*p[22] + p[9]*p[14]*p[31] - 2*p[10]*p[11]*p[17] + p[10]*p[11]*p[26] - 2*p[10]*p[13]*p[22] + p[10]*p[13]*p[31] + 2*p[10]*p[14]*p[20] - p[10]*p[14]*p[29]; - coeff[95] = (p[7]*p[14] + p[8]*p[13] + p[9]*p[12] + p[10]*p[11])*p[15]; - coeff[96] = p[7]*p[11]*p[19] + p[7]*p[14]*p[16] - p[8]*p[12]*p[19] + p[8]*p[13]*p[16] + p[9]*p[12]*p[16] + p[9]*p[13]*p[19] + p[10]*p[11]*p[16] - p[10]*p[14]*p[19]; - coeff[97] = p[7]*p[11]*p[20] - p[7]*p[12]*p[22] + p[7]*p[14]*p[17] - p[8]*p[11]*p[22] - p[8]*p[12]*p[20] + p[8]*p[13]*p[17] + p[9]*p[12]*p[17] + p[9]*p[13]*p[20] + p[9]*p[14]*p[22] + p[10]*p[11]*p[17] + p[10]*p[13]*p[22] - p[10]*p[14]*p[20]; - coeff[98] = 2*(-p[7]*p[8]*p[23] + p[7]*p[8]*p[32] + p[7]*p[10]*p[18] - p[7]*p[10]*p[27] - p[8]*p[8]*p[21] + p[8]*p[8]*p[30] + p[8]*p[9]*p[18] - p[8]*p[9]*p[27] + p[9]*p[10]*p[23] - p[9]*p[10]*p[32] - p[10]*p[10]*p[21] + p[10]*p[10]*p[30] + p[11]*p[12]*p[23] - p[11]*p[12]*p[32] - p[11]*p[14]*p[18] + p[11]*p[14]*p[27] + p[12]*p[12]*p[21] - p[12]*p[12]*p[30] - p[12]*p[13]*p[18] + p[12]*p[13]*p[27] - p[13]*p[14]*p[23] + p[13]*p[14]*p[32] + p[14]*p[14]*p[21] - p[14]*p[14]*p[30])*p[0]; - coeff[99] = 4*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[7]*p[10]*p[24] + 4*p[0]*p[8]*p[9]*p[15] - 2*p[0]*p[8]*p[9]*p[24] - 4*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[11]*p[14]*p[24] - 4*p[0]*p[12]*p[13]*p[15] + 2*p[0]*p[12]*p[13]*p[24] + 2*p[7]*p[14]*p[15] - 2*p[7]*p[14]*p[24] + 2*p[8]*p[13]*p[15] - 2*p[8]*p[13]*p[24] + 2*p[9]*p[12]*p[15] - 2*p[9]*p[12]*p[24] + 2*p[10]*p[11]*p[15] - 2*p[10]*p[11]*p[24]; - coeff[100] = 4*p[0]*p[7]*p[10]*p[16] - 2*p[0]*p[7]*p[10]*p[25] - 4*p[0]*p[8]*p[8]*p[19] + 2*p[0]*p[8]*p[8]*p[28] + 4*p[0]*p[8]*p[9]*p[16] - 2*p[0]*p[8]*p[9]*p[25] - 4*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[10]*p[10]*p[28] - 4*p[0]*p[11]*p[14]*p[16] + 2*p[0]*p[11]*p[14]*p[25] + 4*p[0]*p[12]*p[12]*p[19] - 2*p[0]*p[12]*p[12]*p[28] - 4*p[0]*p[12]*p[13]*p[16] + 2*p[0]*p[12]*p[13]*p[25] + 4*p[0]*p[14]*p[14]*p[19] - 2*p[0]*p[14]*p[14]*p[28] + 2*p[7]*p[11]*p[19] - 2*p[7]*p[11]*p[28] + 2*p[7]*p[14]*p[16] - 2*p[7]*p[14]*p[25] - 2*p[8]*p[12]*p[19] + 2*p[8]*p[12]*p[28] + 2*p[8]*p[13]*p[16] - 2*p[8]*p[13]*p[25] + 2*p[9]*p[12]*p[16] - 2*p[9]*p[12]*p[25] + 2*p[9]*p[13]*p[19] - 2*p[9]*p[13]*p[28] + 2*p[10]*p[11]*p[16] - 2*p[10]*p[11]*p[25] - 2*p[10]*p[14]*p[19] + 2*p[10]*p[14]*p[28]; - coeff[101] = -4*p[0]*p[7]*p[8]*p[22] + 2*p[0]*p[7]*p[8]*p[31] + 4*p[0]*p[7]*p[10]*p[17] - 2*p[0]*p[7]*p[10]*p[26] - 4*p[0]*p[8]*p[8]*p[20] + 2*p[0]*p[8]*p[8]*p[29] + 4*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[8]*p[9]*p[26] + 4*p[0]*p[9]*p[10]*p[22] - 2*p[0]*p[9]*p[10]*p[31] - 4*p[0]*p[10]*p[10]*p[20] + 2*p[0]*p[10]*p[10]*p[29] + 4*p[0]*p[11]*p[12]*p[22] - 2*p[0]*p[11]*p[12]*p[31] - 4*p[0]*p[11]*p[14]*p[17] + 2*p[0]*p[11]*p[14]*p[26] + 4*p[0]*p[12]*p[12]*p[20] - 2*p[0]*p[12]*p[12]*p[29] - 4*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[12]*p[13]*p[26] - 4*p[0]*p[13]*p[14]*p[22] + 2*p[0]*p[13]*p[14]*p[31] + 4*p[0]*p[14]*p[14]*p[20] - 2*p[0]*p[14]*p[14]*p[29] + 2*p[7]*p[11]*p[20] - 2*p[7]*p[11]*p[29] - 2*p[7]*p[12]*p[22] + 2*p[7]*p[12]*p[31] + 2*p[7]*p[14]*p[17] - 2*p[7]*p[14]*p[26] - 2*p[8]*p[11]*p[22] + 2*p[8]*p[11]*p[31] - 2*p[8]*p[12]*p[20] + 2*p[8]*p[12]*p[29] + 2*p[8]*p[13]*p[17] - 2*p[8]*p[13]*p[26] + 2*p[9]*p[12]*p[17] - 2*p[9]*p[12]*p[26] + 2*p[9]*p[13]*p[20] - 2*p[9]*p[13]*p[29] + 2*p[9]*p[14]*p[22] - 2*p[9]*p[14]*p[31] + 2*p[10]*p[11]*p[17] - 2*p[10]*p[11]*p[26] + 2*p[10]*p[13]*p[22] - 2*p[10]*p[13]*p[31] - 2*p[10]*p[14]*p[20] + 2*p[10]*p[14]*p[29]; - coeff[102] = -2*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[8]*p[9]*p[15] + 2*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[12]*p[13]*p[15] - 2*p[7]*p[14]*p[15] + 2*p[7]*p[14]*p[24] - 2*p[8]*p[13]*p[15] + 2*p[8]*p[13]*p[24] - 2*p[9]*p[12]*p[15] + 2*p[9]*p[12]*p[24] - 2*p[10]*p[11]*p[15] + 2*p[10]*p[11]*p[24]; - coeff[103] = -2*p[0]*p[7]*p[10]*p[16] + 2*p[0]*p[8]*p[8]*p[19] - 2*p[0]*p[8]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[11]*p[14]*p[16] - 2*p[0]*p[12]*p[12]*p[19] + 2*p[0]*p[12]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[19] - 2*p[7]*p[11]*p[19] + 2*p[7]*p[11]*p[28] - 2*p[7]*p[14]*p[16] + 2*p[7]*p[14]*p[25] + 2*p[8]*p[12]*p[19] - 2*p[8]*p[12]*p[28] - 2*p[8]*p[13]*p[16] + 2*p[8]*p[13]*p[25] - 2*p[9]*p[12]*p[16] + 2*p[9]*p[12]*p[25] - 2*p[9]*p[13]*p[19] + 2*p[9]*p[13]*p[28] - 2*p[10]*p[11]*p[16] + 2*p[10]*p[11]*p[25] + 2*p[10]*p[14]*p[19] - 2*p[10]*p[14]*p[28]; - coeff[104] = 2*p[0]*p[7]*p[8]*p[22] - 2*p[0]*p[7]*p[10]*p[17] + 2*p[0]*p[8]*p[8]*p[20] - 2*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[9]*p[10]*p[22] + 2*p[0]*p[10]*p[10]*p[20] - 2*p[0]*p[11]*p[12]*p[22] + 2*p[0]*p[11]*p[14]*p[17] - 2*p[0]*p[12]*p[12]*p[20] + 2*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[13]*p[14]*p[22] - 2*p[0]*p[14]*p[14]*p[20] - 2*p[7]*p[11]*p[20] + 2*p[7]*p[11]*p[29] + 2*p[7]*p[12]*p[22] - 2*p[7]*p[12]*p[31] - 2*p[7]*p[14]*p[17] + 2*p[7]*p[14]*p[26] + 2*p[8]*p[11]*p[22] - 2*p[8]*p[11]*p[31] + 2*p[8]*p[12]*p[20] - 2*p[8]*p[12]*p[29] - 2*p[8]*p[13]*p[17] + 2*p[8]*p[13]*p[26] - 2*p[9]*p[12]*p[17] + 2*p[9]*p[12]*p[26] - 2*p[9]*p[13]*p[20] + 2*p[9]*p[13]*p[29] - 2*p[9]*p[14]*p[22] + 2*p[9]*p[14]*p[31] - 2*p[10]*p[11]*p[17] + 2*p[10]*p[11]*p[26] - 2*p[10]*p[13]*p[22] + 2*p[10]*p[13]*p[31] + 2*p[10]*p[14]*p[20] - 2*p[10]*p[14]*p[29]; - coeff[105] = 0; - coeff[106] = 2*(-p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - p[8]*p[9]*p[15] + p[8]*p[9]*p[24] + p[11]*p[14]*p[15] - p[11]*p[14]*p[24] + p[12]*p[13]*p[15] - p[12]*p[13]*p[24])*p[0]; - coeff[107] = 2*(-p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + p[10]*p[10]*p[19] - p[10]*p[10]*p[28] + p[11]*p[14]*p[16] - p[11]*p[14]*p[25] - p[12]*p[12]*p[19] + p[12]*p[12]*p[28] + p[12]*p[13]*p[16] - p[12]*p[13]*p[25] - p[14]*p[14]*p[19] + p[14]*p[14]*p[28])*p[0]; - coeff[108] = 2*(p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + p[10]*p[10]*p[20] - p[10]*p[10]*p[29] - p[11]*p[12]*p[22] + p[11]*p[12]*p[31] + p[11]*p[14]*p[17] - p[11]*p[14]*p[26] - p[12]*p[12]*p[20] + p[12]*p[12]*p[29] + p[12]*p[13]*p[17] - p[12]*p[13]*p[26] + p[13]*p[14]*p[22] - p[13]*p[14]*p[31] - p[14]*p[14]*p[20] + p[14]*p[14]*p[29])*p[0]; - coeff[109] = 2*(p[7]*p[10]*p[15] - p[7]*p[10]*p[24] + p[8]*p[9]*p[15] - p[8]*p[9]*p[24] - p[11]*p[14]*p[15] + p[11]*p[14]*p[24] - p[12]*p[13]*p[15] + p[12]*p[13]*p[24])*p[0]; - coeff[110] = 2*(p[7]*p[10]*p[16] - p[7]*p[10]*p[25] - p[8]*p[8]*p[19] + p[8]*p[8]*p[28] + p[8]*p[9]*p[16] - p[8]*p[9]*p[25] - p[10]*p[10]*p[19] + p[10]*p[10]*p[28] - p[11]*p[14]*p[16] + p[11]*p[14]*p[25] + p[12]*p[12]*p[19] - p[12]*p[12]*p[28] - p[12]*p[13]*p[16] + p[12]*p[13]*p[25] + p[14]*p[14]*p[19] - p[14]*p[14]*p[28])*p[0]; - coeff[111] = 2*(-p[7]*p[8]*p[22] + p[7]*p[8]*p[31] + p[7]*p[10]*p[17] - p[7]*p[10]*p[26] - p[8]*p[8]*p[20] + p[8]*p[8]*p[29] + p[8]*p[9]*p[17] - p[8]*p[9]*p[26] + p[9]*p[10]*p[22] - p[9]*p[10]*p[31] - p[10]*p[10]*p[20] + p[10]*p[10]*p[29] + p[11]*p[12]*p[22] - p[11]*p[12]*p[31] - p[11]*p[14]*p[17] + p[11]*p[14]*p[26] + p[12]*p[12]*p[20] - p[12]*p[12]*p[29] - p[12]*p[13]*p[17] + p[12]*p[13]*p[26] - p[13]*p[14]*p[22] + p[13]*p[14]*p[31] + p[14]*p[14]*p[20] - p[14]*p[14]*p[29])*p[0]; - coeff[112] = -p[3] + p[6] - p[7]*p[8]*p[21] + p[7]*p[8]*p[30] + p[7]*p[9]*p[18] - p[7]*p[9]*p[27] + p[8]*p[8]*p[23] - p[8]*p[8]*p[32] - p[8]*p[10]*p[18] + p[8]*p[10]*p[27] + p[9]*p[9]*p[23] - p[9]*p[9]*p[32] - p[9]*p[10]*p[21] + p[9]*p[10]*p[30] - p[11]*p[12]*p[21] + p[11]*p[12]*p[30] + p[11]*p[13]*p[18] - p[11]*p[13]*p[27] + p[12]*p[12]*p[23] - p[12]*p[12]*p[32] - p[12]*p[14]*p[18] + p[12]*p[14]*p[27] + p[13]*p[13]*p[23] - p[13]*p[13]*p[32] - p[13]*p[14]*p[21] + p[13]*p[14]*p[30] - p[23] + p[32]; - coeff[113] = 2*p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + p[12]*p[14]*p[24]; - coeff[114] = -2*p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + p[13]*p[14]*p[28]; - coeff[115] = -2*p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + p[13]*p[14]*p[29] - 2*p[22] + p[31]; - coeff[116] = (-p[7]*p[9] + p[8]*p[10] - p[11]*p[13] + p[12]*p[14])*p[15]; - coeff[117] = p[7]*p[8]*p[19] - p[7]*p[9]*p[16] + p[8]*p[10]*p[16] + p[9]*p[10]*p[19] + p[11]*p[12]*p[19] - p[11]*p[13]*p[16] + p[12]*p[14]*p[16] + p[13]*p[14]*p[19]; - coeff[118] = p[7]*p[8]*p[20] - p[7]*p[9]*p[17] - p[8]*p[8]*p[22] + p[8]*p[10]*p[17] - p[9]*p[9]*p[22] + p[9]*p[10]*p[20] + p[11]*p[12]*p[20] - p[11]*p[13]*p[17] - p[12]*p[12]*p[22] + p[12]*p[14]*p[17] - p[13]*p[13]*p[22] + p[13]*p[14]*p[20] + p[22]; - coeff[119] = 0; - coeff[120] = -2*p[7]*p[9]*p[15] + 2*p[7]*p[9]*p[24] + 2*p[8]*p[10]*p[15] - 2*p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + 2*p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - 2*p[12]*p[14]*p[24]; - coeff[121] = 2*p[7]*p[8]*p[19] - 2*p[7]*p[8]*p[28] - 2*p[7]*p[9]*p[16] + 2*p[7]*p[9]*p[25] + 2*p[8]*p[10]*p[16] - 2*p[8]*p[10]*p[25] + 2*p[9]*p[10]*p[19] - 2*p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - 2*p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + 2*p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - 2*p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - 2*p[13]*p[14]*p[28]; - coeff[122] = 2*p[7]*p[8]*p[20] - 2*p[7]*p[8]*p[29] - 2*p[7]*p[9]*p[17] + 2*p[7]*p[9]*p[26] - 2*p[8]*p[8]*p[22] + 2*p[8]*p[8]*p[31] + 2*p[8]*p[10]*p[17] - 2*p[8]*p[10]*p[26] - 2*p[9]*p[9]*p[22] + 2*p[9]*p[9]*p[31] + 2*p[9]*p[10]*p[20] - 2*p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - 2*p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + 2*p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + 2*p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - 2*p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + 2*p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - 2*p[13]*p[14]*p[29] + 2*p[22] - 2*p[31]; - coeff[123] = 2*p[7]*p[9]*p[15] - 2*p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + 2*p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - 2*p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + 2*p[12]*p[14]*p[24]; - coeff[124] = -2*p[7]*p[8]*p[19] + 2*p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - 2*p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + 2*p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + 2*p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + 2*p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - 2*p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + 2*p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + 2*p[13]*p[14]*p[28]; - coeff[125] = -2*p[7]*p[8]*p[20] + 2*p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - 2*p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - 2*p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + 2*p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - 2*p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + 2*p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + 2*p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - 2*p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - 2*p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + 2*p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - 2*p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + 2*p[13]*p[14]*p[29] - 2*p[22] + 2*p[31]; - coeff[126] = 2*p[0]*p[7]*p[11]*p[23] + 2*p[0]*p[7]*p[12]*p[21] - 2*p[0]*p[7]*p[13]*p[18] + 2*p[0]*p[8]*p[11]*p[21] - 2*p[0]*p[8]*p[12]*p[23] + 2*p[0]*p[8]*p[14]*p[18] - 2*p[0]*p[9]*p[11]*p[18] - 2*p[0]*p[9]*p[13]*p[23] + 2*p[0]*p[9]*p[14]*p[21] + 2*p[0]*p[10]*p[12]*p[18] + 2*p[0]*p[10]*p[13]*p[21] + 2*p[0]*p[10]*p[14]*p[23] - p[7]*p[8]*p[21] + p[7]*p[8]*p[30] + p[7]*p[9]*p[18] - p[7]*p[9]*p[27] + p[8]*p[8]*p[23] - p[8]*p[8]*p[32] - p[8]*p[10]*p[18] + p[8]*p[10]*p[27] + p[9]*p[9]*p[23] - p[9]*p[9]*p[32] - p[9]*p[10]*p[21] + p[9]*p[10]*p[30] + p[11]*p[12]*p[21] - p[11]*p[12]*p[30] - p[11]*p[13]*p[18] + p[11]*p[13]*p[27] - p[12]*p[12]*p[23] + p[12]*p[12]*p[32] + p[12]*p[14]*p[18] - p[12]*p[14]*p[27] - p[13]*p[13]*p[23] + p[13]*p[13]*p[32] + p[13]*p[14]*p[21] - p[13]*p[14]*p[30]; - coeff[127] = -2*p[0]*p[7]*p[13]*p[15] + 2*p[0]*p[8]*p[14]*p[15] - 2*p[0]*p[9]*p[11]*p[15] + 2*p[0]*p[10]*p[12]*p[15] + 2*p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - p[12]*p[14]*p[24]; - coeff[128] = 2*p[0]*p[7]*p[12]*p[19] - 2*p[0]*p[7]*p[13]*p[16] + 2*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[14]*p[16] - 2*p[0]*p[9]*p[11]*p[16] + 2*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[13]*p[19] - 2*p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - p[13]*p[14]*p[28]; - coeff[129] = 2*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[12]*p[20] - 2*p[0]*p[7]*p[13]*p[17] + 2*p[0]*p[8]*p[11]*p[20] - 2*p[0]*p[8]*p[12]*p[22] + 2*p[0]*p[8]*p[14]*p[17] - 2*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[13]*p[22] + 2*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[14]*p[22] - 2*p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - p[13]*p[14]*p[29]; - coeff[130] = (-p[7]*p[9] + p[8]*p[10] + p[11]*p[13] - p[12]*p[14])*p[15]; - coeff[131] = p[7]*p[8]*p[19] - p[7]*p[9]*p[16] + p[8]*p[10]*p[16] + p[9]*p[10]*p[19] - p[11]*p[12]*p[19] + p[11]*p[13]*p[16] - p[12]*p[14]*p[16] - p[13]*p[14]*p[19]; - coeff[132] = p[7]*p[8]*p[20] - p[7]*p[9]*p[17] - p[8]*p[8]*p[22] + p[8]*p[10]*p[17] - p[9]*p[9]*p[22] + p[9]*p[10]*p[20] - p[11]*p[12]*p[20] + p[11]*p[13]*p[17] + p[12]*p[12]*p[22] - p[12]*p[14]*p[17] + p[13]*p[13]*p[22] - p[13]*p[14]*p[20]; - coeff[133] = 2*(-p[7]*p[11]*p[23] + p[7]*p[11]*p[32] - p[7]*p[12]*p[21] + p[7]*p[12]*p[30] + p[7]*p[13]*p[18] - p[7]*p[13]*p[27] - p[8]*p[11]*p[21] + p[8]*p[11]*p[30] + p[8]*p[12]*p[23] - p[8]*p[12]*p[32] - p[8]*p[14]*p[18] + p[8]*p[14]*p[27] + p[9]*p[11]*p[18] - p[9]*p[11]*p[27] + p[9]*p[13]*p[23] - p[9]*p[13]*p[32] - p[9]*p[14]*p[21] + p[9]*p[14]*p[30] - p[10]*p[12]*p[18] + p[10]*p[12]*p[27] - p[10]*p[13]*p[21] + p[10]*p[13]*p[30] - p[10]*p[14]*p[23] + p[10]*p[14]*p[32])*p[0]; - coeff[134] = 4*p[0]*p[7]*p[13]*p[15] - 2*p[0]*p[7]*p[13]*p[24] - 4*p[0]*p[8]*p[14]*p[15] + 2*p[0]*p[8]*p[14]*p[24] + 4*p[0]*p[9]*p[11]*p[15] - 2*p[0]*p[9]*p[11]*p[24] - 4*p[0]*p[10]*p[12]*p[15] + 2*p[0]*p[10]*p[12]*p[24] - 2*p[7]*p[9]*p[15] + 2*p[7]*p[9]*p[24] + 2*p[8]*p[10]*p[15] - 2*p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - 2*p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + 2*p[12]*p[14]*p[24]; - coeff[135] = -4*p[0]*p[7]*p[12]*p[19] + 2*p[0]*p[7]*p[12]*p[28] + 4*p[0]*p[7]*p[13]*p[16] - 2*p[0]*p[7]*p[13]*p[25] - 4*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[11]*p[28] - 4*p[0]*p[8]*p[14]*p[16] + 2*p[0]*p[8]*p[14]*p[25] + 4*p[0]*p[9]*p[11]*p[16] - 2*p[0]*p[9]*p[11]*p[25] - 4*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[9]*p[14]*p[28] - 4*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[12]*p[25] - 4*p[0]*p[10]*p[13]*p[19] + 2*p[0]*p[10]*p[13]*p[28] + 2*p[7]*p[8]*p[19] - 2*p[7]*p[8]*p[28] - 2*p[7]*p[9]*p[16] + 2*p[7]*p[9]*p[25] + 2*p[8]*p[10]*p[16] - 2*p[8]*p[10]*p[25] + 2*p[9]*p[10]*p[19] - 2*p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + 2*p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - 2*p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + 2*p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + 2*p[13]*p[14]*p[28]; - coeff[136] = -4*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[11]*p[31] - 4*p[0]*p[7]*p[12]*p[20] + 2*p[0]*p[7]*p[12]*p[29] + 4*p[0]*p[7]*p[13]*p[17] - 2*p[0]*p[7]*p[13]*p[26] - 4*p[0]*p[8]*p[11]*p[20] + 2*p[0]*p[8]*p[11]*p[29] + 4*p[0]*p[8]*p[12]*p[22] - 2*p[0]*p[8]*p[12]*p[31] - 4*p[0]*p[8]*p[14]*p[17] + 2*p[0]*p[8]*p[14]*p[26] + 4*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[11]*p[26] + 4*p[0]*p[9]*p[13]*p[22] - 2*p[0]*p[9]*p[13]*p[31] - 4*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[9]*p[14]*p[29] - 4*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[12]*p[26] - 4*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[13]*p[29] - 4*p[0]*p[10]*p[14]*p[22] + 2*p[0]*p[10]*p[14]*p[31] + 2*p[7]*p[8]*p[20] - 2*p[7]*p[8]*p[29] - 2*p[7]*p[9]*p[17] + 2*p[7]*p[9]*p[26] - 2*p[8]*p[8]*p[22] + 2*p[8]*p[8]*p[31] + 2*p[8]*p[10]*p[17] - 2*p[8]*p[10]*p[26] - 2*p[9]*p[9]*p[22] + 2*p[9]*p[9]*p[31] + 2*p[9]*p[10]*p[20] - 2*p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + 2*p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - 2*p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - 2*p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + 2*p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - 2*p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + 2*p[13]*p[14]*p[29]; - coeff[137] = -2*p[0]*p[7]*p[13]*p[15] + 2*p[0]*p[8]*p[14]*p[15] - 2*p[0]*p[9]*p[11]*p[15] + 2*p[0]*p[10]*p[12]*p[15] + 2*p[7]*p[9]*p[15] - 2*p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + 2*p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + 2*p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - 2*p[12]*p[14]*p[24]; - coeff[138] = 2*p[0]*p[7]*p[12]*p[19] - 2*p[0]*p[7]*p[13]*p[16] + 2*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[14]*p[16] - 2*p[0]*p[9]*p[11]*p[16] + 2*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[13]*p[19] - 2*p[7]*p[8]*p[19] + 2*p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - 2*p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + 2*p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + 2*p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - 2*p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + 2*p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - 2*p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - 2*p[13]*p[14]*p[28]; - coeff[139] = 2*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[12]*p[20] - 2*p[0]*p[7]*p[13]*p[17] + 2*p[0]*p[8]*p[11]*p[20] - 2*p[0]*p[8]*p[12]*p[22] + 2*p[0]*p[8]*p[14]*p[17] - 2*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[13]*p[22] + 2*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[14]*p[22] - 2*p[7]*p[8]*p[20] + 2*p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - 2*p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - 2*p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + 2*p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - 2*p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + 2*p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - 2*p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + 2*p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + 2*p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - 2*p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + 2*p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - 2*p[13]*p[14]*p[29]; - coeff[140] = 0; - coeff[141] = 2*(-p[7]*p[13]*p[15] + p[7]*p[13]*p[24] + p[8]*p[14]*p[15] - p[8]*p[14]*p[24] - p[9]*p[11]*p[15] + p[9]*p[11]*p[24] + p[10]*p[12]*p[15] - p[10]*p[12]*p[24])*p[0]; - coeff[142] = 2*(p[7]*p[12]*p[19] - p[7]*p[12]*p[28] - p[7]*p[13]*p[16] + p[7]*p[13]*p[25] + p[8]*p[11]*p[19] - p[8]*p[11]*p[28] + p[8]*p[14]*p[16] - p[8]*p[14]*p[25] - p[9]*p[11]*p[16] + p[9]*p[11]*p[25] + p[9]*p[14]*p[19] - p[9]*p[14]*p[28] + p[10]*p[12]*p[16] - p[10]*p[12]*p[25] + p[10]*p[13]*p[19] - p[10]*p[13]*p[28])*p[0]; - coeff[143] = 2*(p[7]*p[11]*p[22] - p[7]*p[11]*p[31] + p[7]*p[12]*p[20] - p[7]*p[12]*p[29] - p[7]*p[13]*p[17] + p[7]*p[13]*p[26] + p[8]*p[11]*p[20] - p[8]*p[11]*p[29] - p[8]*p[12]*p[22] + p[8]*p[12]*p[31] + p[8]*p[14]*p[17] - p[8]*p[14]*p[26] - p[9]*p[11]*p[17] + p[9]*p[11]*p[26] - p[9]*p[13]*p[22] + p[9]*p[13]*p[31] + p[9]*p[14]*p[20] - p[9]*p[14]*p[29] + p[10]*p[12]*p[17] - p[10]*p[12]*p[26] + p[10]*p[13]*p[20] - p[10]*p[13]*p[29] + p[10]*p[14]*p[22] - p[10]*p[14]*p[31])*p[0]; - coeff[144] = 2*(p[7]*p[13]*p[15] - p[7]*p[13]*p[24] - p[8]*p[14]*p[15] + p[8]*p[14]*p[24] + p[9]*p[11]*p[15] - p[9]*p[11]*p[24] - p[10]*p[12]*p[15] + p[10]*p[12]*p[24])*p[0]; - coeff[145] = 2*(-p[7]*p[12]*p[19] + p[7]*p[12]*p[28] + p[7]*p[13]*p[16] - p[7]*p[13]*p[25] - p[8]*p[11]*p[19] + p[8]*p[11]*p[28] - p[8]*p[14]*p[16] + p[8]*p[14]*p[25] + p[9]*p[11]*p[16] - p[9]*p[11]*p[25] - p[9]*p[14]*p[19] + p[9]*p[14]*p[28] - p[10]*p[12]*p[16] + p[10]*p[12]*p[25] - p[10]*p[13]*p[19] + p[10]*p[13]*p[28])*p[0]; - coeff[146] = 2*(-p[7]*p[11]*p[22] + p[7]*p[11]*p[31] - p[7]*p[12]*p[20] + p[7]*p[12]*p[29] + p[7]*p[13]*p[17] - p[7]*p[13]*p[26] - p[8]*p[11]*p[20] + p[8]*p[11]*p[29] + p[8]*p[12]*p[22] - p[8]*p[12]*p[31] - p[8]*p[14]*p[17] + p[8]*p[14]*p[26] + p[9]*p[11]*p[17] - p[9]*p[11]*p[26] + p[9]*p[13]*p[22] - p[9]*p[13]*p[31] - p[9]*p[14]*p[20] + p[9]*p[14]*p[29] - p[10]*p[12]*p[17] + p[10]*p[12]*p[26] - p[10]*p[13]*p[20] + p[10]*p[13]*p[29] - p[10]*p[14]*p[22] + p[10]*p[14]*p[31])*p[0]; - coeff[147] = -2*p[0]*p[7]*p[8]*p[21] + 2*p[0]*p[7]*p[9]*p[18] + 2*p[0]*p[8]*p[8]*p[23] - 2*p[0]*p[8]*p[10]*p[18] + 2*p[0]*p[9]*p[9]*p[23] - 2*p[0]*p[9]*p[10]*p[21] + 2*p[0]*p[11]*p[12]*p[21] - 2*p[0]*p[11]*p[13]*p[18] - 2*p[0]*p[12]*p[12]*p[23] + 2*p[0]*p[12]*p[14]*p[18] - 2*p[0]*p[13]*p[13]*p[23] + 2*p[0]*p[13]*p[14]*p[21] - p[7]*p[11]*p[23] + p[7]*p[11]*p[32] - p[7]*p[12]*p[21] + p[7]*p[12]*p[30] + p[7]*p[13]*p[18] - p[7]*p[13]*p[27] - p[8]*p[11]*p[21] + p[8]*p[11]*p[30] + p[8]*p[12]*p[23] - p[8]*p[12]*p[32] - p[8]*p[14]*p[18] + p[8]*p[14]*p[27] + p[9]*p[11]*p[18] - p[9]*p[11]*p[27] + p[9]*p[13]*p[23] - p[9]*p[13]*p[32] - p[9]*p[14]*p[21] + p[9]*p[14]*p[30] - p[10]*p[12]*p[18] + p[10]*p[12]*p[27] - p[10]*p[13]*p[21] + p[10]*p[13]*p[30] - p[10]*p[14]*p[23] + p[10]*p[14]*p[32]; - coeff[148] = 2*p[0]*p[7]*p[9]*p[15] - 2*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[11]*p[13]*p[15] + 2*p[0]*p[12]*p[14]*p[15] + 2*p[7]*p[13]*p[15] - p[7]*p[13]*p[24] - 2*p[8]*p[14]*p[15] + p[8]*p[14]*p[24] + 2*p[9]*p[11]*p[15] - p[9]*p[11]*p[24] - 2*p[10]*p[12]*p[15] + p[10]*p[12]*p[24]; - coeff[149] = -2*p[0]*p[7]*p[8]*p[19] + 2*p[0]*p[7]*p[9]*p[16] - 2*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[9]*p[10]*p[19] + 2*p[0]*p[11]*p[12]*p[19] - 2*p[0]*p[11]*p[13]*p[16] + 2*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[13]*p[14]*p[19] - 2*p[7]*p[12]*p[19] + p[7]*p[12]*p[28] + 2*p[7]*p[13]*p[16] - p[7]*p[13]*p[25] - 2*p[8]*p[11]*p[19] + p[8]*p[11]*p[28] - 2*p[8]*p[14]*p[16] + p[8]*p[14]*p[25] + 2*p[9]*p[11]*p[16] - p[9]*p[11]*p[25] - 2*p[9]*p[14]*p[19] + p[9]*p[14]*p[28] - 2*p[10]*p[12]*p[16] + p[10]*p[12]*p[25] - 2*p[10]*p[13]*p[19] + p[10]*p[13]*p[28]; - coeff[150] = -2*p[0]*p[7]*p[8]*p[20] + 2*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[8]*p[8]*p[22] - 2*p[0]*p[8]*p[10]*p[17] + 2*p[0]*p[9]*p[9]*p[22] - 2*p[0]*p[9]*p[10]*p[20] + 2*p[0]*p[11]*p[12]*p[20] - 2*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[12]*p[12]*p[22] + 2*p[0]*p[12]*p[14]*p[17] - 2*p[0]*p[13]*p[13]*p[22] + 2*p[0]*p[13]*p[14]*p[20] - 2*p[7]*p[11]*p[22] + p[7]*p[11]*p[31] - 2*p[7]*p[12]*p[20] + p[7]*p[12]*p[29] + 2*p[7]*p[13]*p[17] - p[7]*p[13]*p[26] - 2*p[8]*p[11]*p[20] + p[8]*p[11]*p[29] + 2*p[8]*p[12]*p[22] - p[8]*p[12]*p[31] - 2*p[8]*p[14]*p[17] + p[8]*p[14]*p[26] + 2*p[9]*p[11]*p[17] - p[9]*p[11]*p[26] + 2*p[9]*p[13]*p[22] - p[9]*p[13]*p[31] - 2*p[9]*p[14]*p[20] + p[9]*p[14]*p[29] - 2*p[10]*p[12]*p[17] + p[10]*p[12]*p[26] - 2*p[10]*p[13]*p[20] + p[10]*p[13]*p[29] - 2*p[10]*p[14]*p[22] + p[10]*p[14]*p[31]; - coeff[151] = (-p[7]*p[13] + p[8]*p[14] - p[9]*p[11] + p[10]*p[12])*p[15]; - coeff[152] = p[7]*p[12]*p[19] - p[7]*p[13]*p[16] + p[8]*p[11]*p[19] + p[8]*p[14]*p[16] - p[9]*p[11]*p[16] + p[9]*p[14]*p[19] + p[10]*p[12]*p[16] + p[10]*p[13]*p[19]; - coeff[153] = p[7]*p[11]*p[22] + p[7]*p[12]*p[20] - p[7]*p[13]*p[17] + p[8]*p[11]*p[20] - p[8]*p[12]*p[22] + p[8]*p[14]*p[17] - p[9]*p[11]*p[17] - p[9]*p[13]*p[22] + p[9]*p[14]*p[20] + p[10]*p[12]*p[17] + p[10]*p[13]*p[20] + p[10]*p[14]*p[22]; - coeff[154] = 2*(p[7]*p[8]*p[21] - p[7]*p[8]*p[30] - p[7]*p[9]*p[18] + p[7]*p[9]*p[27] - p[8]*p[8]*p[23] + p[8]*p[8]*p[32] + p[8]*p[10]*p[18] - p[8]*p[10]*p[27] - p[9]*p[9]*p[23] + p[9]*p[9]*p[32] + p[9]*p[10]*p[21] - p[9]*p[10]*p[30] - p[11]*p[12]*p[21] + p[11]*p[12]*p[30] + p[11]*p[13]*p[18] - p[11]*p[13]*p[27] + p[12]*p[12]*p[23] - p[12]*p[12]*p[32] - p[12]*p[14]*p[18] + p[12]*p[14]*p[27] + p[13]*p[13]*p[23] - p[13]*p[13]*p[32] - p[13]*p[14]*p[21] + p[13]*p[14]*p[30])*p[0]; - coeff[155] = -4*p[0]*p[7]*p[9]*p[15] + 2*p[0]*p[7]*p[9]*p[24] + 4*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[8]*p[10]*p[24] + 4*p[0]*p[11]*p[13]*p[15] - 2*p[0]*p[11]*p[13]*p[24] - 4*p[0]*p[12]*p[14]*p[15] + 2*p[0]*p[12]*p[14]*p[24] - 2*p[7]*p[13]*p[15] + 2*p[7]*p[13]*p[24] + 2*p[8]*p[14]*p[15] - 2*p[8]*p[14]*p[24] - 2*p[9]*p[11]*p[15] + 2*p[9]*p[11]*p[24] + 2*p[10]*p[12]*p[15] - 2*p[10]*p[12]*p[24]; - coeff[156] = 4*p[0]*p[7]*p[8]*p[19] - 2*p[0]*p[7]*p[8]*p[28] - 4*p[0]*p[7]*p[9]*p[16] + 2*p[0]*p[7]*p[9]*p[25] + 4*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[8]*p[10]*p[25] + 4*p[0]*p[9]*p[10]*p[19] - 2*p[0]*p[9]*p[10]*p[28] - 4*p[0]*p[11]*p[12]*p[19] + 2*p[0]*p[11]*p[12]*p[28] + 4*p[0]*p[11]*p[13]*p[16] - 2*p[0]*p[11]*p[13]*p[25] - 4*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[12]*p[14]*p[25] - 4*p[0]*p[13]*p[14]*p[19] + 2*p[0]*p[13]*p[14]*p[28] + 2*p[7]*p[12]*p[19] - 2*p[7]*p[12]*p[28] - 2*p[7]*p[13]*p[16] + 2*p[7]*p[13]*p[25] + 2*p[8]*p[11]*p[19] - 2*p[8]*p[11]*p[28] + 2*p[8]*p[14]*p[16] - 2*p[8]*p[14]*p[25] - 2*p[9]*p[11]*p[16] + 2*p[9]*p[11]*p[25] + 2*p[9]*p[14]*p[19] - 2*p[9]*p[14]*p[28] + 2*p[10]*p[12]*p[16] - 2*p[10]*p[12]*p[25] + 2*p[10]*p[13]*p[19] - 2*p[10]*p[13]*p[28]; - coeff[157] = 4*p[0]*p[7]*p[8]*p[20] - 2*p[0]*p[7]*p[8]*p[29] - 4*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[7]*p[9]*p[26] - 4*p[0]*p[8]*p[8]*p[22] + 2*p[0]*p[8]*p[8]*p[31] + 4*p[0]*p[8]*p[10]*p[17] - 2*p[0]*p[8]*p[10]*p[26] - 4*p[0]*p[9]*p[9]*p[22] + 2*p[0]*p[9]*p[9]*p[31] + 4*p[0]*p[9]*p[10]*p[20] - 2*p[0]*p[9]*p[10]*p[29] - 4*p[0]*p[11]*p[12]*p[20] + 2*p[0]*p[11]*p[12]*p[29] + 4*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[11]*p[13]*p[26] + 4*p[0]*p[12]*p[12]*p[22] - 2*p[0]*p[12]*p[12]*p[31] - 4*p[0]*p[12]*p[14]*p[17] + 2*p[0]*p[12]*p[14]*p[26] + 4*p[0]*p[13]*p[13]*p[22] - 2*p[0]*p[13]*p[13]*p[31] - 4*p[0]*p[13]*p[14]*p[20] + 2*p[0]*p[13]*p[14]*p[29] + 2*p[7]*p[11]*p[22] - 2*p[7]*p[11]*p[31] + 2*p[7]*p[12]*p[20] - 2*p[7]*p[12]*p[29] - 2*p[7]*p[13]*p[17] + 2*p[7]*p[13]*p[26] + 2*p[8]*p[11]*p[20] - 2*p[8]*p[11]*p[29] - 2*p[8]*p[12]*p[22] + 2*p[8]*p[12]*p[31] + 2*p[8]*p[14]*p[17] - 2*p[8]*p[14]*p[26] - 2*p[9]*p[11]*p[17] + 2*p[9]*p[11]*p[26] - 2*p[9]*p[13]*p[22] + 2*p[9]*p[13]*p[31] + 2*p[9]*p[14]*p[20] - 2*p[9]*p[14]*p[29] + 2*p[10]*p[12]*p[17] - 2*p[10]*p[12]*p[26] + 2*p[10]*p[13]*p[20] - 2*p[10]*p[13]*p[29] + 2*p[10]*p[14]*p[22] - 2*p[10]*p[14]*p[31]; - coeff[158] = 2*p[0]*p[7]*p[9]*p[15] - 2*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[11]*p[13]*p[15] + 2*p[0]*p[12]*p[14]*p[15] + 2*p[7]*p[13]*p[15] - 2*p[7]*p[13]*p[24] - 2*p[8]*p[14]*p[15] + 2*p[8]*p[14]*p[24] + 2*p[9]*p[11]*p[15] - 2*p[9]*p[11]*p[24] - 2*p[10]*p[12]*p[15] + 2*p[10]*p[12]*p[24]; - coeff[159] = -2*p[0]*p[7]*p[8]*p[19] + 2*p[0]*p[7]*p[9]*p[16] - 2*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[9]*p[10]*p[19] + 2*p[0]*p[11]*p[12]*p[19] - 2*p[0]*p[11]*p[13]*p[16] + 2*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[13]*p[14]*p[19] - 2*p[7]*p[12]*p[19] + 2*p[7]*p[12]*p[28] + 2*p[7]*p[13]*p[16] - 2*p[7]*p[13]*p[25] - 2*p[8]*p[11]*p[19] + 2*p[8]*p[11]*p[28] - 2*p[8]*p[14]*p[16] + 2*p[8]*p[14]*p[25] + 2*p[9]*p[11]*p[16] - 2*p[9]*p[11]*p[25] - 2*p[9]*p[14]*p[19] + 2*p[9]*p[14]*p[28] - 2*p[10]*p[12]*p[16] + 2*p[10]*p[12]*p[25] - 2*p[10]*p[13]*p[19] + 2*p[10]*p[13]*p[28]; - coeff[160] = -2*p[0]*p[7]*p[8]*p[20] + 2*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[8]*p[8]*p[22] - 2*p[0]*p[8]*p[10]*p[17] + 2*p[0]*p[9]*p[9]*p[22] - 2*p[0]*p[9]*p[10]*p[20] + 2*p[0]*p[11]*p[12]*p[20] - 2*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[12]*p[12]*p[22] + 2*p[0]*p[12]*p[14]*p[17] - 2*p[0]*p[13]*p[13]*p[22] + 2*p[0]*p[13]*p[14]*p[20] - 2*p[7]*p[11]*p[22] + 2*p[7]*p[11]*p[31] - 2*p[7]*p[12]*p[20] + 2*p[7]*p[12]*p[29] + 2*p[7]*p[13]*p[17] - 2*p[7]*p[13]*p[26] - 2*p[8]*p[11]*p[20] + 2*p[8]*p[11]*p[29] + 2*p[8]*p[12]*p[22] - 2*p[8]*p[12]*p[31] - 2*p[8]*p[14]*p[17] + 2*p[8]*p[14]*p[26] + 2*p[9]*p[11]*p[17] - 2*p[9]*p[11]*p[26] + 2*p[9]*p[13]*p[22] - 2*p[9]*p[13]*p[31] - 2*p[9]*p[14]*p[20] + 2*p[9]*p[14]*p[29] - 2*p[10]*p[12]*p[17] + 2*p[10]*p[12]*p[26] - 2*p[10]*p[13]*p[20] + 2*p[10]*p[13]*p[29] - 2*p[10]*p[14]*p[22] + 2*p[10]*p[14]*p[31]; - coeff[161] = 0; - coeff[162] = 2*(p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - p[8]*p[10]*p[15] + p[8]*p[10]*p[24] - p[11]*p[13]*p[15] + p[11]*p[13]*p[24] + p[12]*p[14]*p[15] - p[12]*p[14]*p[24])*p[0]; - coeff[163] = 2*(-p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - p[9]*p[10]*p[19] + p[9]*p[10]*p[28] + p[11]*p[12]*p[19] - p[11]*p[12]*p[28] - p[11]*p[13]*p[16] + p[11]*p[13]*p[25] + p[12]*p[14]*p[16] - p[12]*p[14]*p[25] + p[13]*p[14]*p[19] - p[13]*p[14]*p[28])*p[0]; - coeff[164] = 2*(-p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - p[9]*p[10]*p[20] + p[9]*p[10]*p[29] + p[11]*p[12]*p[20] - p[11]*p[12]*p[29] - p[11]*p[13]*p[17] + p[11]*p[13]*p[26] - p[12]*p[12]*p[22] + p[12]*p[12]*p[31] + p[12]*p[14]*p[17] - p[12]*p[14]*p[26] - p[13]*p[13]*p[22] + p[13]*p[13]*p[31] + p[13]*p[14]*p[20] - p[13]*p[14]*p[29])*p[0]; - coeff[165] = 2*(-p[7]*p[9]*p[15] + p[7]*p[9]*p[24] + p[8]*p[10]*p[15] - p[8]*p[10]*p[24] + p[11]*p[13]*p[15] - p[11]*p[13]*p[24] - p[12]*p[14]*p[15] + p[12]*p[14]*p[24])*p[0]; - coeff[166] = 2*(p[7]*p[8]*p[19] - p[7]*p[8]*p[28] - p[7]*p[9]*p[16] + p[7]*p[9]*p[25] + p[8]*p[10]*p[16] - p[8]*p[10]*p[25] + p[9]*p[10]*p[19] - p[9]*p[10]*p[28] - p[11]*p[12]*p[19] + p[11]*p[12]*p[28] + p[11]*p[13]*p[16] - p[11]*p[13]*p[25] - p[12]*p[14]*p[16] + p[12]*p[14]*p[25] - p[13]*p[14]*p[19] + p[13]*p[14]*p[28])*p[0]; - coeff[167] = 2*(p[7]*p[8]*p[20] - p[7]*p[8]*p[29] - p[7]*p[9]*p[17] + p[7]*p[9]*p[26] - p[8]*p[8]*p[22] + p[8]*p[8]*p[31] + p[8]*p[10]*p[17] - p[8]*p[10]*p[26] - p[9]*p[9]*p[22] + p[9]*p[9]*p[31] + p[9]*p[10]*p[20] - p[9]*p[10]*p[29] - p[11]*p[12]*p[20] + p[11]*p[12]*p[29] + p[11]*p[13]*p[17] - p[11]*p[13]*p[26] + p[12]*p[12]*p[22] - p[12]*p[12]*p[31] - p[12]*p[14]*p[17] + p[12]*p[14]*p[26] + p[13]*p[13]*p[22] - p[13]*p[13]*p[31] - p[13]*p[14]*p[20] + p[13]*p[14]*p[29])*p[0]; -} - -} // namespace embree diff --git a/thirdparty/embree-aarch64/kernels/common/point_query.h b/thirdparty/embree-aarch64/kernels/common/point_query.h deleted file mode 100644 index 27d158ca3a..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/point_query.h +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" - -namespace embree -{ - /* Point query structure for closest point query */ - template<int K> - struct RTC_ALIGN(16) PointQueryK - { - /* Default construction does nothing */ - __forceinline PointQueryK() {} - - /* Constructs a ray from origin, direction, and ray segment. Near - * has to be smaller than far */ - __forceinline PointQueryK(const Vec3vf<K>& p, const vfloat<K>& radius = inf, const vfloat<K>& time = zero) - : p(p), time(time), radius(radius) {} - - /* Returns the size of the ray */ - static __forceinline size_t size() { return K; } - - /* Calculates if this is a valid ray that does not cause issues during traversal */ - __forceinline vbool<K> valid() const - { - const vbool<K> vx = (abs(p.x) <= vfloat<K>(FLT_LARGE)); - const vbool<K> vy = (abs(p.y) <= vfloat<K>(FLT_LARGE)); - const vbool<K> vz = (abs(p.z) <= vfloat<K>(FLT_LARGE)); - const vbool<K> vn = radius >= vfloat<K>(0); - const vbool<K> vf = abs(time) < vfloat<K>(inf); - return vx & vy & vz & vn & vf; - } - - __forceinline void get(PointQueryK<1>* ray) const; - __forceinline void get(size_t i, PointQueryK<1>& ray) const; - __forceinline void set(const PointQueryK<1>* ray); - __forceinline void set(size_t i, const PointQueryK<1>& ray); - - Vec3vf<K> p; // location of the query point - vfloat<K> time; // time for motion blur - vfloat<K> radius; // radius for the point query - }; - - /* Specialization for a single point query */ - template<> - struct RTC_ALIGN(16) PointQueryK<1> - { - /* Default construction does nothing */ - __forceinline PointQueryK() {} - - /* Constructs a ray from origin, direction, and ray segment. Near - * has to be smaller than far */ - __forceinline PointQueryK(const Vec3fa& p, float radius = inf, float time = zero) - : p(p), time(time), radius(radius) {} - - /* Calculates if this is a valid ray that does not cause issues during traversal */ - __forceinline bool valid() const { - return all(le_mask(abs(Vec3fa(p)), Vec3fa(FLT_LARGE)) & le_mask(Vec3fa(0.f), Vec3fa(radius))) && abs(time) < float(inf); - } - - Vec3f p; - float time; - float radius; - }; - - /* Converts point query packet to single point query */ - template<int K> - __forceinline void PointQueryK<K>::get(PointQueryK<1>* query) const - { - for (size_t i = 0; i < K; i++) // FIXME: use SIMD transpose - { - query[i].p.x = p.x[i]; - query[i].p.y = p.y[i]; - query[i].p.z = p.z[i]; - query[i].time = time[i]; - query[i].radius = radius[i]; - } - } - - /* Extracts a single point query out of a point query packet*/ - template<int K> - __forceinline void PointQueryK<K>::get(size_t i, PointQueryK<1>& query) const - { - query.p.x = p.x[i]; - query.p.y = p.y[i]; - query.p.z = p.z[i]; - query.radius = radius[i]; - query.time = time[i]; - } - - /* Converts single point query to point query packet */ - template<int K> - __forceinline void PointQueryK<K>::set(const PointQueryK<1>* query) - { - for (size_t i = 0; i < K; i++) - { - p.x[i] = query[i].p.x; - p.y[i] = query[i].p.y; - p.z[i] = query[i].p.z; - radius[i] = query[i].radius; - time[i] = query[i].time; - } - } - - /* inserts a single point query into a point query packet element */ - template<int K> - __forceinline void PointQueryK<K>::set(size_t i, const PointQueryK<1>& query) - { - p.x[i] = query.p.x; - p.y[i] = query.p.y; - p.z[i] = query.p.z; - radius[i] = query.radius; - time[i] = query.time; - } - - /* Shortcuts */ - typedef PointQueryK<1> PointQuery; - typedef PointQueryK<4> PointQuery4; - typedef PointQueryK<8> PointQuery8; - typedef PointQueryK<16> PointQuery16; - struct PointQueryN; - - /* Outputs point query to stream */ - template<int K> - __forceinline embree_ostream operator <<(embree_ostream cout, const PointQueryK<K>& query) - { - cout << "{ " << embree_endl - << " p = " << query.p << embree_endl - << " r = " << query.radius << embree_endl - << " time = " << query.time << embree_endl - << "}"; - return cout; - } -} diff --git a/thirdparty/embree-aarch64/kernels/common/primref.h b/thirdparty/embree-aarch64/kernels/common/primref.h deleted file mode 100644 index ce75c982bb..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/primref.h +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" - -namespace embree -{ - /*! A primitive reference stores the bounds of the primitive and its ID. */ - struct __aligned(32) PrimRef - { - __forceinline PrimRef () {} - -#if defined(__AVX__) - __forceinline PrimRef(const PrimRef& v) { - vfloat8::store((float*)this,vfloat8::load((float*)&v)); - } - __forceinline PrimRef& operator=(const PrimRef& v) { - vfloat8::store((float*)this,vfloat8::load((float*)&v)); return *this; - } -#endif - - __forceinline PrimRef (const BBox3fa& bounds, unsigned int geomID, unsigned int primID) - { - lower = Vec3fx(bounds.lower, geomID); - upper = Vec3fx(bounds.upper, primID); - } - - __forceinline PrimRef (const BBox3fa& bounds, size_t id) - { -#if defined(__X86_64__) || defined(__aarch64__) - lower = Vec3fx(bounds.lower, (unsigned)(id & 0xFFFFFFFF)); - upper = Vec3fx(bounds.upper, (unsigned)((id >> 32) & 0xFFFFFFFF)); -#else - lower = Vec3fx(bounds.lower, (unsigned)id); - upper = Vec3fx(bounds.upper, (unsigned)0); -#endif - } - - /*! calculates twice the center of the primitive */ - __forceinline const Vec3fa center2() const { - return lower+upper; - } - - /*! return the bounding box of the primitive */ - __forceinline const BBox3fa bounds() const { - return BBox3fa(lower,upper); - } - - /*! size for bin heuristic is 1 */ - __forceinline unsigned size() const { - return 1; - } - - /*! returns bounds and centroid used for binning */ - __forceinline void binBoundsAndCenter(BBox3fa& bounds_o, Vec3fa& center_o) const - { - bounds_o = bounds(); - center_o = embree::center2(bounds_o); - } - - __forceinline unsigned& geomIDref() { // FIXME: remove !!!!!!! - return lower.u; - } - __forceinline unsigned& primIDref() { // FIXME: remove !!!!!!! - return upper.u; - } - - /*! returns the geometry ID */ - __forceinline unsigned geomID() const { - return lower.a; - } - - /*! returns the primitive ID */ - __forceinline unsigned primID() const { - return upper.a; - } - - /*! returns an size_t sized ID */ - __forceinline size_t ID() const { -#if defined(__X86_64__) || defined(__aarch64__) - return size_t(lower.u) + (size_t(upper.u) << 32); -#else - return size_t(lower.u); -#endif - } - - /*! special function for operator< */ - __forceinline uint64_t ID64() const { - return (((uint64_t)primID()) << 32) + (uint64_t)geomID(); - } - - /*! allows sorting the primrefs by ID */ - friend __forceinline bool operator<(const PrimRef& p0, const PrimRef& p1) { - return p0.ID64() < p1.ID64(); - } - - /*! Outputs primitive reference to a stream. */ - friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRef& ref) { - return cout << "{ lower = " << ref.lower << ", upper = " << ref.upper << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << " }"; - } - - public: - Vec3fx lower; //!< lower bounds and geomID - Vec3fx upper; //!< upper bounds and primID - }; - - /*! fast exchange for PrimRefs */ - __forceinline void xchg(PrimRef& a, PrimRef& b) - { -#if defined(__AVX__) - const vfloat8 aa = vfloat8::load((float*)&a); - const vfloat8 bb = vfloat8::load((float*)&b); - vfloat8::store((float*)&a,bb); - vfloat8::store((float*)&b,aa); -#else - std::swap(a,b); -#endif - } - - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - /************************************************************************************/ - - struct SubGridBuildData { - unsigned short sx,sy; - unsigned int primID; - - __forceinline SubGridBuildData() {}; - __forceinline SubGridBuildData(const unsigned int sx, const unsigned int sy, const unsigned int primID) : sx(sx), sy(sy), primID(primID) {}; - - __forceinline size_t x() const { return (size_t)sx & 0x7fff; } - __forceinline size_t y() const { return (size_t)sy & 0x7fff; } - - }; -} diff --git a/thirdparty/embree-aarch64/kernels/common/primref_mb.h b/thirdparty/embree-aarch64/kernels/common/primref_mb.h deleted file mode 100644 index b6c1ad5712..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/primref_mb.h +++ /dev/null @@ -1,262 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" - -#define MBLUR_BIN_LBBOX 1 - -namespace embree -{ -#if MBLUR_BIN_LBBOX - - /*! A primitive reference stores the bounds of the primitive and its ID. */ - struct PrimRefMB - { - typedef LBBox3fa BBox; - - __forceinline PrimRefMB () {} - - __forceinline PrimRefMB (const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, unsigned int geomID, unsigned int primID) - : lbounds((LBBox3fx)lbounds_i), time_range(time_range) - { - assert(activeTimeSegments > 0); - lbounds.bounds0.lower.a = geomID; - lbounds.bounds0.upper.a = primID; - lbounds.bounds1.lower.a = activeTimeSegments; - lbounds.bounds1.upper.a = totalTimeSegments; - } - - __forceinline PrimRefMB (EmptyTy empty, const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id) - : lbounds((LBBox3fx)lbounds_i), time_range(time_range) - { - assert(activeTimeSegments > 0); -#if defined(__X86_64__) || defined(__aarch64__) - lbounds.bounds0.lower.a = id & 0xFFFFFFFF; - lbounds.bounds0.upper.a = (id >> 32) & 0xFFFFFFFF; -#else - lbounds.bounds0.lower.a = id; - lbounds.bounds0.upper.a = 0; -#endif - lbounds.bounds1.lower.a = activeTimeSegments; - lbounds.bounds1.upper.a = totalTimeSegments; - } - - __forceinline PrimRefMB (const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id) - : lbounds((LBBox3fx)lbounds_i), time_range(time_range) - { - assert(activeTimeSegments > 0); -#if defined(__X86_64__) || defined(__aarch64__) - lbounds.bounds0.lower.u = id & 0xFFFFFFFF; - lbounds.bounds0.upper.u = (id >> 32) & 0xFFFFFFFF; -#else - lbounds.bounds0.lower.u = id; - lbounds.bounds0.upper.u = 0; -#endif - lbounds.bounds1.lower.a = activeTimeSegments; - lbounds.bounds1.upper.a = totalTimeSegments; - } - - /*! returns bounds for binning */ - __forceinline LBBox3fa bounds() const { - return lbounds; - } - - /*! returns the number of time segments of this primref */ - __forceinline unsigned size() const { - return lbounds.bounds1.lower.a; - } - - __forceinline unsigned totalTimeSegments() const { - return lbounds.bounds1.upper.a; - } - - /* calculate overlapping time segment range */ - __forceinline range<int> timeSegmentRange(const BBox1f& range) const { - return getTimeSegmentRange(range,time_range,float(totalTimeSegments())); - } - - /* returns time that corresponds to time step */ - __forceinline float timeStep(const int i) const { - assert(i>=0 && i<=(int)totalTimeSegments()); - return time_range.lower + time_range.size()*float(i)/float(totalTimeSegments()); - } - - /*! checks if time range overlaps */ - __forceinline bool time_range_overlap(const BBox1f& range) const - { - if (0.9999f*time_range.upper <= range.lower) return false; - if (1.0001f*time_range.lower >= range.upper) return false; - return true; - } - - /*! returns center for binning */ - __forceinline Vec3fa binCenter() const { - return center2(lbounds.interpolate(0.5f)); - } - - /*! returns bounds and centroid used for binning */ - __forceinline void binBoundsAndCenter(LBBox3fa& bounds_o, Vec3fa& center_o) const - { - bounds_o = bounds(); - center_o = binCenter(); - } - - /*! returns the geometry ID */ - __forceinline unsigned geomID() const { - return lbounds.bounds0.lower.a; - } - - /*! returns the primitive ID */ - __forceinline unsigned primID() const { - return lbounds.bounds0.upper.a; - } - - /*! returns an size_t sized ID */ - __forceinline size_t ID() const { -#if defined(__X86_64__) || defined(__aarch64__) - return size_t(lbounds.bounds0.lower.u) + (size_t(lbounds.bounds0.upper.u) << 32); -#else - return size_t(lbounds.bounds0.lower.u); -#endif - } - - /*! special function for operator< */ - __forceinline uint64_t ID64() const { - return (((uint64_t)primID()) << 32) + (uint64_t)geomID(); - } - - /*! allows sorting the primrefs by ID */ - friend __forceinline bool operator<(const PrimRefMB& p0, const PrimRefMB& p1) { - return p0.ID64() < p1.ID64(); - } - - /*! Outputs primitive reference to a stream. */ - friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRefMB& ref) { - return cout << "{ time_range = " << ref.time_range << ", bounds = " << ref.bounds() << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << ", active_segments = " << ref.size() << ", total_segments = " << ref.totalTimeSegments() << " }"; - } - - public: - LBBox3fx lbounds; - BBox1f time_range; // entire geometry time range - }; - -#else - - /*! A primitive reference stores the bounds of the primitive and its ID. */ - struct __aligned(16) PrimRefMB - { - typedef BBox3fa BBox; - - __forceinline PrimRefMB () {} - - __forceinline PrimRefMB (const LBBox3fa& bounds, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, unsigned int geomID, unsigned int primID) - : bbox(bounds.interpolate(0.5f)), _activeTimeSegments(activeTimeSegments), _totalTimeSegments(totalTimeSegments), time_range(time_range) - { - assert(activeTimeSegments > 0); - bbox.lower.a = geomID; - bbox.upper.a = primID; - } - - __forceinline PrimRefMB (EmptyTy empty, const LBBox3fa& bounds, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id) - : bbox(bounds.interpolate(0.5f)), _activeTimeSegments(activeTimeSegments), _totalTimeSegments(totalTimeSegments), time_range(time_range) - { - assert(activeTimeSegments > 0); -#if defined(__X86_64__) || defined(__aarch64__) - bbox.lower.u = id & 0xFFFFFFFF; - bbox.upper.u = (id >> 32) & 0xFFFFFFFF; -#else - bbox.lower.u = id; - bbox.upper.u = 0; -#endif - } - - /*! returns bounds for binning */ - __forceinline BBox3fa bounds() const { - return bbox; - } - - /*! returns the number of time segments of this primref */ - __forceinline unsigned int size() const { - return _activeTimeSegments; - } - - __forceinline unsigned int totalTimeSegments() const { - return _totalTimeSegments; - } - - /* calculate overlapping time segment range */ - __forceinline range<int> timeSegmentRange(const BBox1f& range) const { - return getTimeSegmentRange(range,time_range,float(_totalTimeSegments)); - } - - /* returns time that corresponds to time step */ - __forceinline float timeStep(const int i) const { - assert(i>=0 && i<=(int)_totalTimeSegments); - return time_range.lower + time_range.size()*float(i)/float(_totalTimeSegments); - } - - /*! checks if time range overlaps */ - __forceinline bool time_range_overlap(const BBox1f& range) const - { - if (0.9999f*time_range.upper <= range.lower) return false; - if (1.0001f*time_range.lower >= range.upper) return false; - return true; - } - - /*! returns center for binning */ - __forceinline Vec3fa binCenter() const { - return center2(bounds()); - } - - /*! returns bounds and centroid used for binning */ - __forceinline void binBoundsAndCenter(BBox3fa& bounds_o, Vec3fa& center_o) const - { - bounds_o = bounds(); - center_o = center2(bounds()); - } - - /*! returns the geometry ID */ - __forceinline unsigned int geomID() const { - return bbox.lower.a; - } - - /*! returns the primitive ID */ - __forceinline unsigned int primID() const { - return bbox.upper.a; - } - - /*! returns an size_t sized ID */ - __forceinline size_t ID() const { -#if defined(__X86_64__) || defined(__aarch64__) - return size_t(bbox.lower.u) + (size_t(bbox.upper.u) << 32); -#else - return size_t(bbox.lower.u); -#endif - } - - /*! special function for operator< */ - __forceinline uint64_t ID64() const { - return (((uint64_t)primID()) << 32) + (uint64_t)geomID(); - } - - /*! allows sorting the primrefs by ID */ - friend __forceinline bool operator<(const PrimRefMB& p0, const PrimRefMB& p1) { - return p0.ID64() < p1.ID64(); - } - - /*! Outputs primitive reference to a stream. */ - friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRefMB& ref) { - return cout << "{ bounds = " << ref.bounds() << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << ", active_segments = " << ref.size() << ", total_segments = " << ref.totalTimeSegments() << " }"; - } - - public: - BBox3fa bbox; // bounds, geomID, primID - unsigned int _activeTimeSegments; - unsigned int _totalTimeSegments; - BBox1f time_range; // entire geometry time range - }; - -#endif -} diff --git a/thirdparty/embree-aarch64/kernels/common/profile.h b/thirdparty/embree-aarch64/kernels/common/profile.h deleted file mode 100644 index a7de36414d..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/profile.h +++ /dev/null @@ -1,159 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" - -namespace embree -{ - /*! helper structure for the implementation of the profile functions below */ - struct ProfileTimer - { - static const size_t N = 20; - - ProfileTimer () {} - - ProfileTimer (const size_t numSkip) : i(0), j(0), maxJ(0), numSkip(numSkip), t0(0) - { - for (size_t i=0; i<N; i++) names[i] = nullptr; - for (size_t i=0; i<N; i++) dt_fst[i] = 0.0; - for (size_t i=0; i<N; i++) dt_min[i] = pos_inf; - for (size_t i=0; i<N; i++) dt_avg[i] = 0.0; - for (size_t i=0; i<N; i++) dt_max[i] = neg_inf; - } - - __forceinline void begin() - { - j=0; - t0 = tj = getSeconds(); - } - - __forceinline void end() { - absolute("total"); - i++; - } - - __forceinline void operator() (const char* name) { - relative(name); - } - - __forceinline void absolute (const char* name) - { - const double t1 = getSeconds(); - const double dt = t1-t0; - assert(names[j] == nullptr || names[j] == name); - names[j] = name; - if (i == 0) dt_fst[j] = dt; - if (i>=numSkip) { - dt_min[j] = min(dt_min[j],dt); - dt_avg[j] = dt_avg[j] + dt; - dt_max[j] = max(dt_max[j],dt); - } - j++; - maxJ = max(maxJ,j); - } - - __forceinline void relative (const char* name) - { - const double t1 = getSeconds(); - const double dt = t1-tj; - tj = t1; - assert(names[j] == nullptr || names[j] == name); - names[j] = name; - if (i == 0) dt_fst[j] = dt; - if (i>=numSkip) { - dt_min[j] = min(dt_min[j],dt); - dt_avg[j] = dt_avg[j] + dt; - dt_max[j] = max(dt_max[j],dt); - } - j++; - maxJ = max(maxJ,j); - } - - void print(size_t numElements) - { - for (size_t k=0; k<N; k++) - dt_avg[k] /= double(i-numSkip); - - printf(" profile [M/s]:\n"); - for (size_t j=0; j<maxJ; j++) - printf("%20s: fst = %7.2f M/s, min = %7.2f M/s, avg = %7.2f M/s, max = %7.2f M/s\n", - names[j],numElements/dt_fst[j]*1E-6,numElements/dt_max[j]*1E-6,numElements/dt_avg[j]*1E-6,numElements/dt_min[j]*1E-6); - - printf(" profile [ms]:\n"); - for (size_t j=0; j<maxJ; j++) - printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n", - names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]); - } - - void print() - { - printf(" profile:\n"); - - for (size_t k=0; k<N; k++) - dt_avg[k] /= double(i-numSkip); - - for (size_t j=0; j<maxJ; j++) { - printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n", - names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]); - } - } - - double avg() { - return dt_avg[maxJ-1]/double(i-numSkip); - } - - private: - size_t i; - size_t j; - size_t maxJ; - size_t numSkip; - double t0; - double tj; - const char* names[N]; - double dt_fst[N]; - double dt_min[N]; - double dt_avg[N]; - double dt_max[N]; - }; - - /*! This function executes some code block multiple times and measured sections of it. - Use the following way: - - profile(1,10,1000,[&](ProfileTimer& timer) { - // code - timer("A"); - // code - timer("B"); - }); - */ - template<typename Closure> - void profile(const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure) - { - ProfileTimer timer(numSkip); - - for (size_t i=0; i<numSkip+numIter; i++) - { - timer.begin(); - closure(timer); - timer.end(); - } - timer.print(numElements); - } - - /*! similar as the function above, but the timer object comes externally */ - template<typename Closure> - void profile(ProfileTimer& timer, const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure) - { - timer = ProfileTimer(numSkip); - - for (size_t i=0; i<numSkip+numIter; i++) - { - timer.begin(); - closure(timer); - timer.end(); - } - timer.print(numElements); - } -} diff --git a/thirdparty/embree-aarch64/kernels/common/ray.h b/thirdparty/embree-aarch64/kernels/common/ray.h deleted file mode 100644 index 336d48942c..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/ray.h +++ /dev/null @@ -1,1517 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" -#include "instance_stack.h" - -// FIXME: if ray gets seperated into ray* and hit, uload4 needs to be adjusted - -namespace embree -{ - static const size_t MAX_INTERNAL_STREAM_SIZE = 32; - - /* Ray structure for K rays */ - template<int K> - struct RayK - { - /* Default construction does nothing */ - __forceinline RayK() {} - - /* Constructs a ray from origin, direction, and ray segment. Near - * has to be smaller than far */ - __forceinline RayK(const Vec3vf<K>& org, const Vec3vf<K>& dir, - const vfloat<K>& tnear = zero, const vfloat<K>& tfar = inf, - const vfloat<K>& time = zero, const vint<K>& mask = -1, const vint<K>& id = 0, const vint<K>& flags = 0) - : org(org), dir(dir), _tnear(tnear), tfar(tfar), _time(time), mask(mask), id(id), flags(flags) {} - - /* Returns the size of the ray */ - static __forceinline size_t size() { return K; } - - /* Calculates if this is a valid ray that does not cause issues during traversal */ - __forceinline vbool<K> valid() const - { - const vbool<K> vx = (abs(org.x) <= vfloat<K>(FLT_LARGE)) & (abs(dir.x) <= vfloat<K>(FLT_LARGE)); - const vbool<K> vy = (abs(org.y) <= vfloat<K>(FLT_LARGE)) & (abs(dir.y) <= vfloat<K>(FLT_LARGE)); - const vbool<K> vz = (abs(org.z) <= vfloat<K>(FLT_LARGE)) & (abs(dir.z) <= vfloat<K>(FLT_LARGE)); - const vbool<K> vn = abs(tnear()) <= vfloat<K>(inf); - const vbool<K> vf = abs(tfar) <= vfloat<K>(inf); - return vx & vy & vz & vn & vf; - } - - __forceinline void get(RayK<1>* ray) const; - __forceinline void get(size_t i, RayK<1>& ray) const; - __forceinline void set(const RayK<1>* ray); - __forceinline void set(size_t i, const RayK<1>& ray); - - __forceinline void copy(size_t dest, size_t source); - - __forceinline vint<K> octant() const - { - return select(dir.x < 0.0f, vint<K>(1), vint<K>(zero)) | - select(dir.y < 0.0f, vint<K>(2), vint<K>(zero)) | - select(dir.z < 0.0f, vint<K>(4), vint<K>(zero)); - } - - /* Ray data */ - Vec3vf<K> org; // ray origin - vfloat<K> _tnear; // start of ray segment - Vec3vf<K> dir; // ray direction - vfloat<K> _time; // time of this ray for motion blur - vfloat<K> tfar; // end of ray segment - vint<K> mask; // used to mask out objects during traversal - vint<K> id; - vint<K> flags; - - __forceinline vfloat<K>& tnear() { return _tnear; } - __forceinline vfloat<K>& time() { return _time; } - __forceinline const vfloat<K>& tnear() const { return _tnear; } - __forceinline const vfloat<K>& time() const { return _time; } - }; - - /* Ray+hit structure for K rays */ - template<int K> - struct RayHitK : RayK<K> - { - using RayK<K>::org; - using RayK<K>::_tnear; - using RayK<K>::dir; - using RayK<K>::_time; - using RayK<K>::tfar; - using RayK<K>::mask; - using RayK<K>::id; - using RayK<K>::flags; - - using RayK<K>::tnear; - using RayK<K>::time; - - /* Default construction does nothing */ - __forceinline RayHitK() {} - - /* Constructs a ray from origin, direction, and ray segment. Near - * has to be smaller than far */ - __forceinline RayHitK(const Vec3vf<K>& org, const Vec3vf<K>& dir, - const vfloat<K>& tnear = zero, const vfloat<K>& tfar = inf, - const vfloat<K>& time = zero, const vint<K>& mask = -1, const vint<K>& id = 0, const vint<K>& flags = 0) - : RayK<K>(org, dir, tnear, tfar, time, mask, id, flags), - geomID(RTC_INVALID_GEOMETRY_ID) - { - for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) - instID[l] = RTC_INVALID_GEOMETRY_ID; - } - - __forceinline RayHitK(const RayK<K>& ray) - : RayK<K>(ray), - geomID(RTC_INVALID_GEOMETRY_ID) - { - for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) - instID[l] = RTC_INVALID_GEOMETRY_ID; - } - - __forceinline RayHitK<K>& operator =(const RayK<K>& ray) - { - org = ray.org; - _tnear = ray._tnear; - dir = ray.dir; - _time = ray._time; - tfar = ray.tfar; - mask = ray.mask; - id = ray.id; - flags = ray.flags; - - geomID = RTC_INVALID_GEOMETRY_ID; - for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) - instID[l] = RTC_INVALID_GEOMETRY_ID; - - return *this; - } - - /* Calculates if the hit is valid */ - __forceinline void verifyHit(const vbool<K>& valid0) const - { - vbool<K> valid = valid0 & geomID != vuint<K>(RTC_INVALID_GEOMETRY_ID); - const vbool<K> vt = (abs(tfar) <= vfloat<K>(FLT_LARGE)) | (tfar == vfloat<K>(neg_inf)); - const vbool<K> vu = (abs(u) <= vfloat<K>(FLT_LARGE)); - const vbool<K> vv = (abs(u) <= vfloat<K>(FLT_LARGE)); - const vbool<K> vnx = abs(Ng.x) <= vfloat<K>(FLT_LARGE); - const vbool<K> vny = abs(Ng.y) <= vfloat<K>(FLT_LARGE); - const vbool<K> vnz = abs(Ng.z) <= vfloat<K>(FLT_LARGE); - if (any(valid & !vt)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid t"); - if (any(valid & !vu)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid u"); - if (any(valid & !vv)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid v"); - if (any(valid & !vnx)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.x"); - if (any(valid & !vny)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.y"); - if (any(valid & !vnz)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.z"); - } - - __forceinline void get(RayHitK<1>* ray) const; - __forceinline void get(size_t i, RayHitK<1>& ray) const; - __forceinline void set(const RayHitK<1>* ray); - __forceinline void set(size_t i, const RayHitK<1>& ray); - - __forceinline void copy(size_t dest, size_t source); - - /* Hit data */ - Vec3vf<K> Ng; // geometry normal - vfloat<K> u; // barycentric u coordinate of hit - vfloat<K> v; // barycentric v coordinate of hit - vuint<K> primID; // primitive ID - vuint<K> geomID; // geometry ID - vuint<K> instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID - }; - - /* Specialization for a single ray */ - template<> - struct RayK<1> - { - /* Default construction does nothing */ - __forceinline RayK() {} - - /* Constructs a ray from origin, direction, and ray segment. Near - * has to be smaller than far */ - __forceinline RayK(const Vec3fa& org, const Vec3fa& dir, float tnear = zero, float tfar = inf, float time = zero, int mask = -1, int id = 0, int flags = 0) - : org(org,tnear), dir(dir,time), tfar(tfar), mask(mask), id(id), flags(flags) {} - - /* Calculates if this is a valid ray that does not cause issues during traversal */ - __forceinline bool valid() const { - return all(le_mask(abs(Vec3fa(org)), Vec3fa(FLT_LARGE)) & le_mask(abs(Vec3fa(dir)), Vec3fa(FLT_LARGE))) && abs(tnear()) <= float(inf) && abs(tfar) <= float(inf); - } - - /* Ray data */ - Vec3ff org; // 3 floats for ray origin, 1 float for tnear - //float tnear; // start of ray segment - Vec3ff dir; // 3 floats for ray direction, 1 float for time - // float time; - float tfar; // end of ray segment - int mask; // used to mask out objects during traversal - int id; // ray ID - int flags; // ray flags - - __forceinline float& tnear() { return org.w; }; - __forceinline const float& tnear() const { return org.w; }; - - __forceinline float& time() { return dir.w; }; - __forceinline const float& time() const { return dir.w; }; - - }; - - template<> - struct RayHitK<1> : RayK<1> - { - /* Default construction does nothing */ - __forceinline RayHitK() {} - - /* Constructs a ray from origin, direction, and ray segment. Near - * has to be smaller than far */ - __forceinline RayHitK(const Vec3fa& org, const Vec3fa& dir, float tnear = zero, float tfar = inf, float time = zero, int mask = -1, int id = 0, int flags = 0) - : RayK<1>(org, dir, tnear, tfar, time, mask, id, flags), - geomID(RTC_INVALID_GEOMETRY_ID) {} - - __forceinline RayHitK(const RayK<1>& ray) - : RayK<1>(ray), - geomID(RTC_INVALID_GEOMETRY_ID) {} - - __forceinline RayHitK<1>& operator =(const RayK<1>& ray) - { - org = ray.org; - dir = ray.dir; - tfar = ray.tfar; - mask = ray.mask; - id = ray.id; - flags = ray.flags; - - geomID = RTC_INVALID_GEOMETRY_ID; - - return *this; - } - - /* Calculates if the hit is valid */ - __forceinline void verifyHit() const - { - if (geomID == RTC_INVALID_GEOMETRY_ID) return; - const bool vt = (abs(tfar) <= FLT_LARGE) || (tfar == float(neg_inf)); - const bool vu = (abs(u) <= FLT_LARGE); - const bool vv = (abs(u) <= FLT_LARGE); - const bool vnx = abs(Ng.x) <= FLT_LARGE; - const bool vny = abs(Ng.y) <= FLT_LARGE; - const bool vnz = abs(Ng.z) <= FLT_LARGE; - if (!vt) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid t"); - if (!vu) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid u"); - if (!vv) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid v"); - if (!vnx) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.x"); - if (!vny) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.y"); - if (!vnz) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.z"); - } - - /* Hit data */ - Vec3f Ng; // not normalized geometry normal - float u; // barycentric u coordinate of hit - float v; // barycentric v coordinate of hit - unsigned int primID; // primitive ID - unsigned int geomID; // geometry ID - unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID - }; - - /* Converts ray packet to single rays */ - template<int K> - __forceinline void RayK<K>::get(RayK<1>* ray) const - { - for (size_t i = 0; i < K; i++) // FIXME: use SIMD transpose - { - ray[i].org.x = org.x[i]; ray[i].org.y = org.y[i]; ray[i].org.z = org.z[i]; ray[i].tnear() = tnear()[i]; - ray[i].dir.x = dir.x[i]; ray[i].dir.y = dir.y[i]; ray[i].dir.z = dir.z[i]; ray[i].time() = time()[i]; - ray[i].tfar = tfar[i]; ray[i].mask = mask[i]; ray[i].id = id[i]; ray[i].flags = flags[i]; - } - } - - template<int K> - __forceinline void RayHitK<K>::get(RayHitK<1>* ray) const - { - // FIXME: use SIMD transpose - for (size_t i = 0; i < K; i++) - get(i, ray[i]); - } - - /* Extracts a single ray out of a ray packet*/ - template<int K> - __forceinline void RayK<K>::get(size_t i, RayK<1>& ray) const - { - ray.org.x = org.x[i]; ray.org.y = org.y[i]; ray.org.z = org.z[i]; ray.tnear() = tnear()[i]; - ray.dir.x = dir.x[i]; ray.dir.y = dir.y[i]; ray.dir.z = dir.z[i]; ray.time() = time()[i]; - ray.tfar = tfar[i]; ray.mask = mask[i]; ray.id = id[i]; ray.flags = flags[i]; - } - - template<int K> - __forceinline void RayHitK<K>::get(size_t i, RayHitK<1>& ray) const - { - ray.org.x = org.x[i]; ray.org.y = org.y[i]; ray.org.z = org.z[i]; ray.tnear() = tnear()[i]; - ray.dir.x = dir.x[i]; ray.dir.y = dir.y[i]; ray.dir.z = dir.z[i]; ray.tfar = tfar[i]; ray.time() = time()[i]; - ray.mask = mask[i]; ray.id = id[i]; ray.flags = flags[i]; - ray.Ng.x = Ng.x[i]; ray.Ng.y = Ng.y[i]; ray.Ng.z = Ng.z[i]; - ray.u = u[i]; ray.v = v[i]; - ray.primID = primID[i]; ray.geomID = geomID[i]; - - instance_id_stack::copy(instID, ray.instID, i); - } - - /* Converts single rays to ray packet */ - template<int K> - __forceinline void RayK<K>::set(const RayK<1>* ray) - { - // FIXME: use SIMD transpose - for (size_t i = 0; i < K; i++) - set(i, ray[i]); - } - - template<int K> - __forceinline void RayHitK<K>::set(const RayHitK<1>* ray) - { - // FIXME: use SIMD transpose - for (size_t i = 0; i < K; i++) - set(i, ray[i]); - } - - /* inserts a single ray into a ray packet element */ - template<int K> - __forceinline void RayK<K>::set(size_t i, const RayK<1>& ray) - { - org.x[i] = ray.org.x; org.y[i] = ray.org.y; org.z[i] = ray.org.z; tnear()[i] = ray.tnear(); - dir.x[i] = ray.dir.x; dir.y[i] = ray.dir.y; dir.z[i] = ray.dir.z; time()[i] = ray.time(); - tfar[i] = ray.tfar; mask[i] = ray.mask; id[i] = ray.id; flags[i] = ray.flags; - } - - template<int K> - __forceinline void RayHitK<K>::set(size_t i, const RayHitK<1>& ray) - { - org.x[i] = ray.org.x; org.y[i] = ray.org.y; org.z[i] = ray.org.z; tnear()[i] = ray.tnear(); - dir.x[i] = ray.dir.x; dir.y[i] = ray.dir.y; dir.z[i] = ray.dir.z; time()[i] = ray.time(); - tfar[i] = ray.tfar; mask[i] = ray.mask; id[i] = ray.id; flags[i] = ray.flags; - Ng.x[i] = ray.Ng.x; Ng.y[i] = ray.Ng.y; Ng.z[i] = ray.Ng.z; - u[i] = ray.u; v[i] = ray.v; - primID[i] = ray.primID; geomID[i] = ray.geomID; - - instance_id_stack::copy(ray.instID, instID, i); - } - - /* copies a ray packet element into another element*/ - template<int K> - __forceinline void RayK<K>::copy(size_t dest, size_t source) - { - org.x[dest] = org.x[source]; org.y[dest] = org.y[source]; org.z[dest] = org.z[source]; tnear()[dest] = tnear()[source]; - dir.x[dest] = dir.x[source]; dir.y[dest] = dir.y[source]; dir.z[dest] = dir.z[source]; time()[dest] = time()[source]; - tfar [dest] = tfar[source]; mask[dest] = mask[source]; id[dest] = id[source]; flags[dest] = flags[source]; - } - - template<int K> - __forceinline void RayHitK<K>::copy(size_t dest, size_t source) - { - org.x[dest] = org.x[source]; org.y[dest] = org.y[source]; org.z[dest] = org.z[source]; tnear()[dest] = tnear()[source]; - dir.x[dest] = dir.x[source]; dir.y[dest] = dir.y[source]; dir.z[dest] = dir.z[source]; time()[dest] = time()[source]; - tfar [dest] = tfar[source]; mask[dest] = mask[source]; id[dest] = id[source]; flags[dest] = flags[source]; - Ng.x[dest] = Ng.x[source]; Ng.y[dest] = Ng.y[source]; Ng.z[dest] = Ng.z[source]; - u[dest] = u[source]; v[dest] = v[source]; - primID[dest] = primID[source]; geomID[dest] = geomID[source]; - - instance_id_stack::copy(instID, instID, source, dest); - } - - /* Shortcuts */ - typedef RayK<1> Ray; - typedef RayK<4> Ray4; - typedef RayK<8> Ray8; - typedef RayK<16> Ray16; - struct RayN; - - typedef RayHitK<1> RayHit; - typedef RayHitK<4> RayHit4; - typedef RayHitK<8> RayHit8; - typedef RayHitK<16> RayHit16; - struct RayHitN; - - template<int K, bool intersect> - struct RayTypeHelper; - - template<int K> - struct RayTypeHelper<K, true> - { - typedef RayHitK<K> Ty; - }; - - template<int K> - struct RayTypeHelper<K, false> - { - typedef RayK<K> Ty; - }; - - template<bool intersect> - using RayType = typename RayTypeHelper<1, intersect>::Ty; - - template<int K, bool intersect> - using RayTypeK = typename RayTypeHelper<K, intersect>::Ty; - - /* Outputs ray to stream */ - template<int K> - __forceinline embree_ostream operator <<(embree_ostream cout, const RayK<K>& ray) - { - return cout << "{ " << embree_endl - << " org = " << ray.org << embree_endl - << " dir = " << ray.dir << embree_endl - << " near = " << ray.tnear() << embree_endl - << " far = " << ray.tfar << embree_endl - << " time = " << ray.time() << embree_endl - << " mask = " << ray.mask << embree_endl - << " id = " << ray.id << embree_endl - << " flags = " << ray.flags << embree_endl - << "}"; - } - - template<int K> - __forceinline embree_ostream operator <<(embree_ostream cout, const RayHitK<K>& ray) - { - cout << "{ " << embree_endl - << " org = " << ray.org << embree_endl - << " dir = " << ray.dir << embree_endl - << " near = " << ray.tnear() << embree_endl - << " far = " << ray.tfar << embree_endl - << " time = " << ray.time() << embree_endl - << " mask = " << ray.mask << embree_endl - << " id = " << ray.id << embree_endl - << " flags = " << ray.flags << embree_endl - << " Ng = " << ray.Ng - << " u = " << ray.u << embree_endl - << " v = " << ray.v << embree_endl - << " primID = " << ray.primID << embree_endl - << " geomID = " << ray.geomID << embree_endl - << " instID ="; - for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) - { - cout << " " << ray.instID[l]; - } - cout << embree_endl; - return cout << "}"; - } - - struct RayStreamSOA - { - __forceinline RayStreamSOA(void* rays, size_t N) - : ptr((char*)rays), N(N) {} - - /* ray data access functions */ - __forceinline float* org_x(size_t offset = 0) { return (float*)&ptr[0*4*N+offset]; } // x coordinate of ray origin - __forceinline float* org_y(size_t offset = 0) { return (float*)&ptr[1*4*N+offset]; } // y coordinate of ray origin - __forceinline float* org_z(size_t offset = 0) { return (float*)&ptr[2*4*N+offset]; }; // z coordinate of ray origin - __forceinline float* tnear(size_t offset = 0) { return (float*)&ptr[3*4*N+offset]; }; // start of ray segment - - __forceinline float* dir_x(size_t offset = 0) { return (float*)&ptr[4*4*N+offset]; }; // x coordinate of ray direction - __forceinline float* dir_y(size_t offset = 0) { return (float*)&ptr[5*4*N+offset]; }; // y coordinate of ray direction - __forceinline float* dir_z(size_t offset = 0) { return (float*)&ptr[6*4*N+offset]; }; // z coordinate of ray direction - __forceinline float* time (size_t offset = 0) { return (float*)&ptr[7*4*N+offset]; }; // time of this ray for motion blur - - __forceinline float* tfar (size_t offset = 0) { return (float*)&ptr[8*4*N+offset]; }; // end of ray segment (set to hit distance) - __forceinline int* mask (size_t offset = 0) { return (int*)&ptr[9*4*N+offset]; }; // used to mask out objects during traversal (optional) - __forceinline int* id (size_t offset = 0) { return (int*)&ptr[10*4*N+offset]; }; // id - __forceinline int* flags(size_t offset = 0) { return (int*)&ptr[11*4*N+offset]; }; // flags - - /* hit data access functions */ - __forceinline float* Ng_x(size_t offset = 0) { return (float*)&ptr[12*4*N+offset]; }; // x coordinate of geometry normal - __forceinline float* Ng_y(size_t offset = 0) { return (float*)&ptr[13*4*N+offset]; }; // y coordinate of geometry normal - __forceinline float* Ng_z(size_t offset = 0) { return (float*)&ptr[14*4*N+offset]; }; // z coordinate of geometry normal - - __forceinline float* u(size_t offset = 0) { return (float*)&ptr[15*4*N+offset]; }; // barycentric u coordinate of hit - __forceinline float* v(size_t offset = 0) { return (float*)&ptr[16*4*N+offset]; }; // barycentric v coordinate of hit - - __forceinline unsigned int* primID(size_t offset = 0) { return (unsigned int*)&ptr[17*4*N+offset]; }; // primitive ID - __forceinline unsigned int* geomID(size_t offset = 0) { return (unsigned int*)&ptr[18*4*N+offset]; }; // geometry ID - __forceinline unsigned int* instID(size_t level, size_t offset = 0) { return (unsigned int*)&ptr[19*4*N+level*4*N+offset]; }; // instance ID - - __forceinline Ray getRayByOffset(size_t offset) - { - Ray ray; - ray.org.x = org_x(offset)[0]; - ray.org.y = org_y(offset)[0]; - ray.org.z = org_z(offset)[0]; - ray.tnear() = tnear(offset)[0]; - ray.dir.x = dir_x(offset)[0]; - ray.dir.y = dir_y(offset)[0]; - ray.dir.z = dir_z(offset)[0]; - ray.time() = time(offset)[0]; - ray.tfar = tfar(offset)[0]; - ray.mask = mask(offset)[0]; - ray.id = id(offset)[0]; - ray.flags = flags(offset)[0]; - return ray; - } - - template<int K> - __forceinline RayK<K> getRayByOffset(size_t offset) - { - RayK<K> ray; - ray.org.x = vfloat<K>::loadu(org_x(offset)); - ray.org.y = vfloat<K>::loadu(org_y(offset)); - ray.org.z = vfloat<K>::loadu(org_z(offset)); - ray.tnear = vfloat<K>::loadu(tnear(offset)); - ray.dir.x = vfloat<K>::loadu(dir_x(offset)); - ray.dir.y = vfloat<K>::loadu(dir_y(offset)); - ray.dir.z = vfloat<K>::loadu(dir_z(offset)); - ray.time = vfloat<K>::loadu(time(offset)); - ray.tfar = vfloat<K>::loadu(tfar(offset)); - ray.mask = vint<K>::loadu(mask(offset)); - ray.id = vint<K>::loadu(id(offset)); - ray.flags = vint<K>::loadu(flags(offset)); - return ray; - } - - template<int K> - __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, size_t offset) - { - RayK<K> ray; - ray.org.x = vfloat<K>::loadu(valid, org_x(offset)); - ray.org.y = vfloat<K>::loadu(valid, org_y(offset)); - ray.org.z = vfloat<K>::loadu(valid, org_z(offset)); - ray.tnear() = vfloat<K>::loadu(valid, tnear(offset)); - ray.dir.x = vfloat<K>::loadu(valid, dir_x(offset)); - ray.dir.y = vfloat<K>::loadu(valid, dir_y(offset)); - ray.dir.z = vfloat<K>::loadu(valid, dir_z(offset)); - ray.time() = vfloat<K>::loadu(valid, time(offset)); - ray.tfar = vfloat<K>::loadu(valid, tfar(offset)); - -#if !defined(__AVX__) - /* SSE: some ray members must be loaded with scalar instructions to ensure that we don't cause memory faults, - because the SSE masked loads always access the entire vector */ - if (unlikely(!all(valid))) - { - ray.mask = zero; - ray.id = zero; - ray.flags = zero; - - for (size_t k = 0; k < K; k++) - { - if (likely(valid[k])) - { - ray.mask[k] = mask(offset)[k]; - ray.id[k] = id(offset)[k]; - ray.flags[k] = flags(offset)[k]; - } - } - } - else -#endif - { - ray.mask = vint<K>::loadu(valid, mask(offset)); - ray.id = vint<K>::loadu(valid, id(offset)); - ray.flags = vint<K>::loadu(valid, flags(offset)); - } - - return ray; - } - - template<int K> - __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayHitK<K>& ray) - { - /* - * valid_i: stores which of the input rays exist (do not access nonexistent rays!) - * valid: stores which of the rays actually hit something. - */ - vbool<K> valid = valid_i; - valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID); - - if (likely(any(valid))) - { - vfloat<K>::storeu(valid, tfar(offset), ray.tfar); - vfloat<K>::storeu(valid, Ng_x(offset), ray.Ng.x); - vfloat<K>::storeu(valid, Ng_y(offset), ray.Ng.y); - vfloat<K>::storeu(valid, Ng_z(offset), ray.Ng.z); - vfloat<K>::storeu(valid, u(offset), ray.u); - vfloat<K>::storeu(valid, v(offset), ray.v); - -#if !defined(__AVX__) - /* SSE: some ray members must be stored with scalar instructions to ensure that we don't cause memory faults, - because the SSE masked stores always access the entire vector */ - if (unlikely(!all(valid_i))) - { - for (size_t k = 0; k < K; k++) - { - if (likely(valid[k])) - { - primID(offset)[k] = ray.primID[k]; - geomID(offset)[k] = ray.geomID[k]; - - instID(0, offset)[k] = ray.instID[0][k]; -#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1) - for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l) - instID(l, offset)[k] = ray.instID[l][k]; -#endif - } - } - } - else -#endif - { - vuint<K>::storeu(valid, primID(offset), ray.primID); - vuint<K>::storeu(valid, geomID(offset), ray.geomID); - - vuint<K>::storeu(valid, instID(0, offset), ray.instID[0]); -#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1) - for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) - vuint<K>::storeu(valid, instID(l, offset), ray.instID[l]); -#endif - } - } - } - - template<int K> - __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayK<K>& ray) - { - vbool<K> valid = valid_i; - valid &= (ray.tfar < 0.0f); - - if (likely(any(valid))) - vfloat<K>::storeu(valid, tfar(offset), ray.tfar); - } - - __forceinline size_t getOctantByOffset(size_t offset) - { - const float dx = dir_x(offset)[0]; - const float dy = dir_y(offset)[0]; - const float dz = dir_z(offset)[0]; - const size_t octantID = (dx < 0.0f ? 1 : 0) + (dy < 0.0f ? 2 : 0) + (dz < 0.0f ? 4 : 0); - return octantID; - } - - __forceinline bool isValidByOffset(size_t offset) - { - const float nnear = tnear(offset)[0]; - const float ffar = tfar(offset)[0]; - return nnear <= ffar; - } - - template<int K> - __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset) - { - RayK<K> ray; - -#if defined(__AVX2__) - ray.org.x = vfloat<K>::template gather<1>(valid, org_x(), offset); - ray.org.y = vfloat<K>::template gather<1>(valid, org_y(), offset); - ray.org.z = vfloat<K>::template gather<1>(valid, org_z(), offset); - ray.tnear() = vfloat<K>::template gather<1>(valid, tnear(), offset); - ray.dir.x = vfloat<K>::template gather<1>(valid, dir_x(), offset); - ray.dir.y = vfloat<K>::template gather<1>(valid, dir_y(), offset); - ray.dir.z = vfloat<K>::template gather<1>(valid, dir_z(), offset); - ray.time() = vfloat<K>::template gather<1>(valid, time(), offset); - ray.tfar = vfloat<K>::template gather<1>(valid, tfar(), offset); - ray.mask = vint<K>::template gather<1>(valid, mask(), offset); - ray.id = vint<K>::template gather<1>(valid, id(), offset); - ray.flags = vint<K>::template gather<1>(valid, flags(), offset); -#else - ray.org = zero; - ray.tnear() = zero; - ray.dir = zero; - ray.time() = zero; - ray.tfar = zero; - ray.mask = zero; - ray.id = zero; - ray.flags = zero; - - for (size_t k = 0; k < K; k++) - { - if (likely(valid[k])) - { - const size_t ofs = offset[k]; - - ray.org.x[k] = *org_x(ofs); - ray.org.y[k] = *org_y(ofs); - ray.org.z[k] = *org_z(ofs); - ray.tnear()[k] = *tnear(ofs); - ray.dir.x[k] = *dir_x(ofs); - ray.dir.y[k] = *dir_y(ofs); - ray.dir.z[k] = *dir_z(ofs); - ray.time()[k] = *time(ofs); - ray.tfar[k] = *tfar(ofs); - ray.mask[k] = *mask(ofs); - ray.id[k] = *id(ofs); - ray.flags[k] = *flags(ofs); - } - } -#endif - - return ray; - } - - template<int K> - __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray) - { - vbool<K> valid = valid_i; - valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID); - - if (likely(any(valid))) - { -#if defined(__AVX512F__) - vfloat<K>::template scatter<1>(valid, tfar(), offset, ray.tfar); - vfloat<K>::template scatter<1>(valid, Ng_x(), offset, ray.Ng.x); - vfloat<K>::template scatter<1>(valid, Ng_y(), offset, ray.Ng.y); - vfloat<K>::template scatter<1>(valid, Ng_z(), offset, ray.Ng.z); - vfloat<K>::template scatter<1>(valid, u(), offset, ray.u); - vfloat<K>::template scatter<1>(valid, v(), offset, ray.v); - vuint<K>::template scatter<1>(valid, primID(), offset, ray.primID); - vuint<K>::template scatter<1>(valid, geomID(), offset, ray.geomID); - - vuint<K>::template scatter<1>(valid, instID(0), offset, ray.instID[0]); -#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1) - for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) - vuint<K>::template scatter<1>(valid, instID(l), offset, ray.instID[l]); -#endif -#else - size_t valid_bits = movemask(valid); - while (valid_bits != 0) - { - const size_t k = bscf(valid_bits); - const size_t ofs = offset[k]; - - *tfar(ofs) = ray.tfar[k]; - - *Ng_x(ofs) = ray.Ng.x[k]; - *Ng_y(ofs) = ray.Ng.y[k]; - *Ng_z(ofs) = ray.Ng.z[k]; - *u(ofs) = ray.u[k]; - *v(ofs) = ray.v[k]; - *primID(ofs) = ray.primID[k]; - *geomID(ofs) = ray.geomID[k]; - - *instID(0, ofs) = ray.instID[0][k]; -#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1) - for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l) - *instID(l, ofs) = ray.instID[l][k]; -#endif - } -#endif - } - } - - template<int K> - __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray) - { - vbool<K> valid = valid_i; - valid &= (ray.tfar < 0.0f); - - if (likely(any(valid))) - { -#if defined(__AVX512F__) - vfloat<K>::template scatter<1>(valid, tfar(), offset, ray.tfar); -#else - size_t valid_bits = movemask(valid); - while (valid_bits != 0) - { - const size_t k = bscf(valid_bits); - const size_t ofs = offset[k]; - - *tfar(ofs) = ray.tfar[k]; - } -#endif - } - } - - char* __restrict__ ptr; - size_t N; - }; - - template<size_t MAX_K> - struct StackRayStreamSOA : public RayStreamSOA - { - __forceinline StackRayStreamSOA(size_t K) - : RayStreamSOA(data, K) { assert(K <= MAX_K); } - - char data[MAX_K / 4 * sizeof(RayHit4)]; - }; - - - struct RayStreamSOP - { - template<class T> - __forceinline void init(T& t) - { - org_x = (float*)&t.org.x; - org_y = (float*)&t.org.y; - org_z = (float*)&t.org.z; - tnear = (float*)&t.tnear; - dir_x = (float*)&t.dir.x; - dir_y = (float*)&t.dir.y; - dir_z = (float*)&t.dir.z; - time = (float*)&t.time; - tfar = (float*)&t.tfar; - mask = (unsigned int*)&t.mask; - id = (unsigned int*)&t.id; - flags = (unsigned int*)&t.flags; - - Ng_x = (float*)&t.Ng.x; - Ng_y = (float*)&t.Ng.y; - Ng_z = (float*)&t.Ng.z; - u = (float*)&t.u; - v = (float*)&t.v; - primID = (unsigned int*)&t.primID; - geomID = (unsigned int*)&t.geomID; - - for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) - instID[l] = (unsigned int*)&t.instID[l]; - } - - __forceinline Ray getRayByOffset(size_t offset) - { - Ray ray; - ray.org.x = *(float* __restrict__)((char*)org_x + offset); - ray.org.y = *(float* __restrict__)((char*)org_y + offset); - ray.org.z = *(float* __restrict__)((char*)org_z + offset); - ray.dir.x = *(float* __restrict__)((char*)dir_x + offset); - ray.dir.y = *(float* __restrict__)((char*)dir_y + offset); - ray.dir.z = *(float* __restrict__)((char*)dir_z + offset); - ray.tfar = *(float* __restrict__)((char*)tfar + offset); - ray.tnear() = tnear ? *(float* __restrict__)((char*)tnear + offset) : 0.0f; - ray.time() = time ? *(float* __restrict__)((char*)time + offset) : 0.0f; - ray.mask = mask ? *(unsigned int* __restrict__)((char*)mask + offset) : -1; - ray.id = id ? *(unsigned int* __restrict__)((char*)id + offset) : -1; - ray.flags = flags ? *(unsigned int* __restrict__)((char*)flags + offset) : -1; - return ray; - } - - template<int K> - __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, size_t offset) - { - RayK<K> ray; - ray.org.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_x + offset)); - ray.org.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_y + offset)); - ray.org.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_z + offset)); - ray.dir.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_x + offset)); - ray.dir.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_y + offset)); - ray.dir.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_z + offset)); - ray.tfar = vfloat<K>::loadu(valid, (float* __restrict__)((char*)tfar + offset)); - ray.tnear() = tnear ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)tnear + offset)) : 0.0f; - ray.time() = time ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)time + offset)) : 0.0f; - ray.mask = mask ? vint<K>::loadu(valid, (const void* __restrict__)((char*)mask + offset)) : -1; - ray.id = id ? vint<K>::loadu(valid, (const void* __restrict__)((char*)id + offset)) : -1; - ray.flags = flags ? vint<K>::loadu(valid, (const void* __restrict__)((char*)flags + offset)) : -1; - return ray; - } - - template<int K> - __forceinline Vec3vf<K> getDirByOffset(const vbool<K>& valid, size_t offset) - { - Vec3vf<K> dir; - dir.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_x + offset)); - dir.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_y + offset)); - dir.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_z + offset)); - return dir; - } - - __forceinline void setHitByOffset(size_t offset, const RayHit& ray) - { - if (ray.geomID != RTC_INVALID_GEOMETRY_ID) - { - *(float* __restrict__)((char*)tfar + offset) = ray.tfar; - - if (likely(Ng_x)) *(float* __restrict__)((char*)Ng_x + offset) = ray.Ng.x; - if (likely(Ng_y)) *(float* __restrict__)((char*)Ng_y + offset) = ray.Ng.y; - if (likely(Ng_z)) *(float* __restrict__)((char*)Ng_z + offset) = ray.Ng.z; - *(float* __restrict__)((char*)u + offset) = ray.u; - *(float* __restrict__)((char*)v + offset) = ray.v; - *(unsigned int* __restrict__)((char*)geomID + offset) = ray.geomID; - *(unsigned int* __restrict__)((char*)primID + offset) = ray.primID; - - if (likely(instID[0])) { - *(unsigned int* __restrict__)((char*)instID[0] + offset) = ray.instID[0]; -#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1) - for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID; ++l) - *(unsigned int* __restrict__)((char*)instID[l] + offset) = ray.instID[l]; -#endif - } - } - } - - __forceinline void setHitByOffset(size_t offset, const Ray& ray) - { - *(float* __restrict__)((char*)tfar + offset) = ray.tfar; - } - - template<int K> - __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayHitK<K>& ray) - { - vbool<K> valid = valid_i; - valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID); - - if (likely(any(valid))) - { - vfloat<K>::storeu(valid, (float* __restrict__)((char*)tfar + offset), ray.tfar); - - if (likely(Ng_x)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_x + offset), ray.Ng.x); - if (likely(Ng_y)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_y + offset), ray.Ng.y); - if (likely(Ng_z)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_z + offset), ray.Ng.z); - vfloat<K>::storeu(valid, (float* __restrict__)((char*)u + offset), ray.u); - vfloat<K>::storeu(valid, (float* __restrict__)((char*)v + offset), ray.v); - vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)primID + offset), ray.primID); - vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)geomID + offset), ray.geomID); - - if (likely(instID[0])) { - vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instID[0] + offset), ray.instID[0]); -#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1) - for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) - vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instID[l] + offset), ray.instID[l]); -#endif - } - } - } - - template<int K> - __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayK<K>& ray) - { - vbool<K> valid = valid_i; - valid &= (ray.tfar < 0.0f); - - if (likely(any(valid))) - vfloat<K>::storeu(valid, (float* __restrict__)((char*)tfar + offset), ray.tfar); - } - - __forceinline size_t getOctantByOffset(size_t offset) - { - const float dx = *(float* __restrict__)((char*)dir_x + offset); - const float dy = *(float* __restrict__)((char*)dir_y + offset); - const float dz = *(float* __restrict__)((char*)dir_z + offset); - const size_t octantID = (dx < 0.0f ? 1 : 0) + (dy < 0.0f ? 2 : 0) + (dz < 0.0f ? 4 : 0); - return octantID; - } - - __forceinline bool isValidByOffset(size_t offset) - { - const float nnear = tnear ? *(float* __restrict__)((char*)tnear + offset) : 0.0f; - const float ffar = *(float* __restrict__)((char*)tfar + offset); - return nnear <= ffar; - } - - template<int K> - __forceinline vbool<K> isValidByOffset(const vbool<K>& valid, size_t offset) - { - const vfloat<K> nnear = tnear ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)tnear + offset)) : 0.0f; - const vfloat<K> ffar = vfloat<K>::loadu(valid, (float* __restrict__)((char*)tfar + offset)); - return nnear <= ffar; - } - - template<int K> - __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset) - { - RayK<K> ray; - -#if defined(__AVX2__) - ray.org.x = vfloat<K>::template gather<1>(valid, org_x, offset); - ray.org.y = vfloat<K>::template gather<1>(valid, org_y, offset); - ray.org.z = vfloat<K>::template gather<1>(valid, org_z, offset); - ray.dir.x = vfloat<K>::template gather<1>(valid, dir_x, offset); - ray.dir.y = vfloat<K>::template gather<1>(valid, dir_y, offset); - ray.dir.z = vfloat<K>::template gather<1>(valid, dir_z, offset); - ray.tfar = vfloat<K>::template gather<1>(valid, tfar, offset); - ray.tnear() = tnear ? vfloat<K>::template gather<1>(valid, tnear, offset) : vfloat<K>(zero); - ray.time() = time ? vfloat<K>::template gather<1>(valid, time, offset) : vfloat<K>(zero); - ray.mask = mask ? vint<K>::template gather<1>(valid, (int*)mask, offset) : vint<K>(-1); - ray.id = id ? vint<K>::template gather<1>(valid, (int*)id, offset) : vint<K>(-1); - ray.flags = flags ? vint<K>::template gather<1>(valid, (int*)flags, offset) : vint<K>(-1); -#else - ray.org = zero; - ray.tnear() = zero; - ray.dir = zero; - ray.tfar = zero; - ray.time() = zero; - ray.mask = zero; - ray.id = zero; - ray.flags = zero; - - for (size_t k = 0; k < K; k++) - { - if (likely(valid[k])) - { - const size_t ofs = offset[k]; - - ray.org.x[k] = *(float* __restrict__)((char*)org_x + ofs); - ray.org.y[k] = *(float* __restrict__)((char*)org_y + ofs); - ray.org.z[k] = *(float* __restrict__)((char*)org_z + ofs); - ray.dir.x[k] = *(float* __restrict__)((char*)dir_x + ofs); - ray.dir.y[k] = *(float* __restrict__)((char*)dir_y + ofs); - ray.dir.z[k] = *(float* __restrict__)((char*)dir_z + ofs); - ray.tfar[k] = *(float* __restrict__)((char*)tfar + ofs); - ray.tnear()[k] = tnear ? *(float* __restrict__)((char*)tnear + ofs) : 0.0f; - ray.time()[k] = time ? *(float* __restrict__)((char*)time + ofs) : 0.0f; - ray.mask[k] = mask ? *(int* __restrict__)((char*)mask + ofs) : -1; - ray.id[k] = id ? *(int* __restrict__)((char*)id + ofs) : -1; - ray.flags[k] = flags ? *(int* __restrict__)((char*)flags + ofs) : -1; - } - } -#endif - - return ray; - } - - template<int K> - __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray) - { - vbool<K> valid = valid_i; - valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID); - - if (likely(any(valid))) - { -#if defined(__AVX512F__) - vfloat<K>::template scatter<1>(valid, tfar, offset, ray.tfar); - - if (likely(Ng_x)) vfloat<K>::template scatter<1>(valid, Ng_x, offset, ray.Ng.x); - if (likely(Ng_y)) vfloat<K>::template scatter<1>(valid, Ng_y, offset, ray.Ng.y); - if (likely(Ng_z)) vfloat<K>::template scatter<1>(valid, Ng_z, offset, ray.Ng.z); - vfloat<K>::template scatter<1>(valid, u, offset, ray.u); - vfloat<K>::template scatter<1>(valid, v, offset, ray.v); - vuint<K>::template scatter<1>(valid, (unsigned int*)geomID, offset, ray.geomID); - vuint<K>::template scatter<1>(valid, (unsigned int*)primID, offset, ray.primID); - - if (likely(instID[0])) { - vuint<K>::template scatter<1>(valid, (unsigned int*)instID[0], offset, ray.instID[0]); -#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1) - for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) - vuint<K>::template scatter<1>(valid, (unsigned int*)instID[l], offset, ray.instID[l]); -#endif - } -#else - size_t valid_bits = movemask(valid); - while (valid_bits != 0) - { - const size_t k = bscf(valid_bits); - const size_t ofs = offset[k]; - - *(float* __restrict__)((char*)tfar + ofs) = ray.tfar[k]; - - if (likely(Ng_x)) *(float* __restrict__)((char*)Ng_x + ofs) = ray.Ng.x[k]; - if (likely(Ng_y)) *(float* __restrict__)((char*)Ng_y + ofs) = ray.Ng.y[k]; - if (likely(Ng_z)) *(float* __restrict__)((char*)Ng_z + ofs) = ray.Ng.z[k]; - *(float* __restrict__)((char*)u + ofs) = ray.u[k]; - *(float* __restrict__)((char*)v + ofs) = ray.v[k]; - *(unsigned int* __restrict__)((char*)primID + ofs) = ray.primID[k]; - *(unsigned int* __restrict__)((char*)geomID + ofs) = ray.geomID[k]; - - if (likely(instID[0])) { - *(unsigned int* __restrict__)((char*)instID[0] + ofs) = ray.instID[0][k]; -#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1) - for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l) - *(unsigned int* __restrict__)((char*)instID[l] + ofs) = ray.instID[l][k]; -#endif - } - } -#endif - } - } - - template<int K> - __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray) - { - vbool<K> valid = valid_i; - valid &= (ray.tfar < 0.0f); - - if (likely(any(valid))) - { -#if defined(__AVX512F__) - vfloat<K>::template scatter<1>(valid, tfar, offset, ray.tfar); -#else - size_t valid_bits = movemask(valid); - while (valid_bits != 0) - { - const size_t k = bscf(valid_bits); - const size_t ofs = offset[k]; - - *(float* __restrict__)((char*)tfar + ofs) = ray.tfar[k]; - } -#endif - } - } - - /* ray data */ - float* __restrict__ org_x; // x coordinate of ray origin - float* __restrict__ org_y; // y coordinate of ray origin - float* __restrict__ org_z; // z coordinate of ray origin - float* __restrict__ tnear; // start of ray segment (optional) - - float* __restrict__ dir_x; // x coordinate of ray direction - float* __restrict__ dir_y; // y coordinate of ray direction - float* __restrict__ dir_z; // z coordinate of ray direction - float* __restrict__ time; // time of this ray for motion blur (optional) - - float* __restrict__ tfar; // end of ray segment (set to hit distance) - unsigned int* __restrict__ mask; // used to mask out objects during traversal (optional) - unsigned int* __restrict__ id; // ray ID - unsigned int* __restrict__ flags; // ray flags - - /* hit data */ - float* __restrict__ Ng_x; // x coordinate of geometry normal (optional) - float* __restrict__ Ng_y; // y coordinate of geometry normal (optional) - float* __restrict__ Ng_z; // z coordinate of geometry normal (optional) - - float* __restrict__ u; // barycentric u coordinate of hit - float* __restrict__ v; // barycentric v coordinate of hit - - unsigned int* __restrict__ primID; // primitive ID - unsigned int* __restrict__ geomID; // geometry ID - unsigned int* __restrict__ instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID (optional) - }; - - - struct RayStreamAOS - { - __forceinline RayStreamAOS(void* rays) - : ptr((Ray*)rays) {} - - __forceinline Ray& getRayByOffset(size_t offset) - { - return *(Ray*)((char*)ptr + offset); - } - - template<int K> - __forceinline RayK<K> getRayByOffset(const vint<K>& offset); - - template<int K> - __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset) - { - const vint<K> valid_offset = select(valid, offset, vintx(zero)); - return getRayByOffset(valid_offset); - } - - template<int K> - __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray) - { - vbool<K> valid = valid_i; - valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID); - - if (likely(any(valid))) - { -#if defined(__AVX512F__) - vfloat<K>::template scatter<1>(valid, &ptr->tfar, offset, ray.tfar); - vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.x, offset, ray.Ng.x); - vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.y, offset, ray.Ng.y); - vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.z, offset, ray.Ng.z); - vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->u, offset, ray.u); - vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->v, offset, ray.v); - vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->primID, offset, ray.primID); - vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->geomID, offset, ray.geomID); - - vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instID[0], offset, ray.instID[0]); -#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1) - for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) - vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instID[l], offset, ray.instID[l]); -#endif -#else - size_t valid_bits = movemask(valid); - while (valid_bits != 0) - { - const size_t k = bscf(valid_bits); - RayHit* __restrict__ ray_k = (RayHit*)((char*)ptr + offset[k]); - ray_k->tfar = ray.tfar[k]; - ray_k->Ng.x = ray.Ng.x[k]; - ray_k->Ng.y = ray.Ng.y[k]; - ray_k->Ng.z = ray.Ng.z[k]; - ray_k->u = ray.u[k]; - ray_k->v = ray.v[k]; - ray_k->primID = ray.primID[k]; - ray_k->geomID = ray.geomID[k]; - - instance_id_stack::copy(ray.instID, ray_k->instID, k); - } -#endif - } - } - - template<int K> - __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray) - { - vbool<K> valid = valid_i; - valid &= (ray.tfar < 0.0f); - - if (likely(any(valid))) - { -#if defined(__AVX512F__) - vfloat<K>::template scatter<1>(valid, &ptr->tfar, offset, ray.tfar); -#else - size_t valid_bits = movemask(valid); - while (valid_bits != 0) - { - const size_t k = bscf(valid_bits); - Ray* __restrict__ ray_k = (Ray*)((char*)ptr + offset[k]); - ray_k->tfar = ray.tfar[k]; - } -#endif - } - } - - Ray* __restrict__ ptr; - }; - - template<> - __forceinline Ray4 RayStreamAOS::getRayByOffset(const vint4& offset) - { - Ray4 ray; - - /* load and transpose: org.x, org.y, org.z, tnear */ - const vfloat4 a0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->org); - const vfloat4 a1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->org); - const vfloat4 a2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->org); - const vfloat4 a3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->org); - - transpose(a0,a1,a2,a3, ray.org.x, ray.org.y, ray.org.z, ray.tnear()); - - /* load and transpose: dir.x, dir.y, dir.z, time */ - const vfloat4 b0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->dir); - const vfloat4 b1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->dir); - const vfloat4 b2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->dir); - const vfloat4 b3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->dir); - - transpose(b0,b1,b2,b3, ray.dir.x, ray.dir.y, ray.dir.z, ray.time()); - - /* load and transpose: tfar, mask, id, flags */ - const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->tfar); - const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->tfar); - const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->tfar); - const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->tfar); - - vfloat4 maskf, idf, flagsf; - transpose(c0,c1,c2,c3, ray.tfar, maskf, idf, flagsf); - ray.mask = asInt(maskf); - ray.id = asInt(idf); - ray.flags = asInt(flagsf); - - return ray; - } - -#if defined(__AVX__) - template<> - __forceinline Ray8 RayStreamAOS::getRayByOffset(const vint8& offset) - { - Ray8 ray; - - /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */ - const vfloat8 ab0 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[0]))->org); - const vfloat8 ab1 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[1]))->org); - const vfloat8 ab2 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[2]))->org); - const vfloat8 ab3 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[3]))->org); - const vfloat8 ab4 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[4]))->org); - const vfloat8 ab5 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[5]))->org); - const vfloat8 ab6 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[6]))->org); - const vfloat8 ab7 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[7]))->org); - - transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time()); - - /* load and transpose: tfar, mask, id, flags */ - const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->tfar); - const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->tfar); - const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->tfar); - const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->tfar); - const vfloat4 c4 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[4]))->tfar); - const vfloat4 c5 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[5]))->tfar); - const vfloat4 c6 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[6]))->tfar); - const vfloat4 c7 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[7]))->tfar); - - vfloat8 maskf, idf, flagsf; - transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf); - ray.mask = asInt(maskf); - ray.id = asInt(idf); - ray.flags = asInt(flagsf); - - return ray; - } -#endif - -#if defined(__AVX512F__) - template<> - __forceinline Ray16 RayStreamAOS::getRayByOffset(const vint16& offset) - { - Ray16 ray; - - /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */ - const vfloat8 ab0 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 0]))->org); - const vfloat8 ab1 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 1]))->org); - const vfloat8 ab2 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 2]))->org); - const vfloat8 ab3 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 3]))->org); - const vfloat8 ab4 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 4]))->org); - const vfloat8 ab5 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 5]))->org); - const vfloat8 ab6 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 6]))->org); - const vfloat8 ab7 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 7]))->org); - const vfloat8 ab8 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 8]))->org); - const vfloat8 ab9 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 9]))->org); - const vfloat8 ab10 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[10]))->org); - const vfloat8 ab11 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[11]))->org); - const vfloat8 ab12 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[12]))->org); - const vfloat8 ab13 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[13]))->org); - const vfloat8 ab14 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[14]))->org); - const vfloat8 ab15 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[15]))->org); - - transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15, - ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time()); - - /* load and transpose: tfar, mask, id, flags */ - const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 0]))->tfar); - const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 1]))->tfar); - const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 2]))->tfar); - const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 3]))->tfar); - const vfloat4 c4 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 4]))->tfar); - const vfloat4 c5 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 5]))->tfar); - const vfloat4 c6 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 6]))->tfar); - const vfloat4 c7 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 7]))->tfar); - const vfloat4 c8 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 8]))->tfar); - const vfloat4 c9 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 9]))->tfar); - const vfloat4 c10 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[10]))->tfar); - const vfloat4 c11 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[11]))->tfar); - const vfloat4 c12 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[12]))->tfar); - const vfloat4 c13 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[13]))->tfar); - const vfloat4 c14 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[14]))->tfar); - const vfloat4 c15 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[15]))->tfar); - - vfloat16 maskf, idf, flagsf; - transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15, - ray.tfar, maskf, idf, flagsf); - ray.mask = asInt(maskf); - ray.id = asInt(idf); - ray.flags = asInt(flagsf); - - return ray; - } -#endif - - - struct RayStreamAOP - { - __forceinline RayStreamAOP(void* rays) - : ptr((Ray**)rays) {} - - __forceinline Ray& getRayByIndex(size_t index) - { - return *ptr[index]; - } - - template<int K> - __forceinline RayK<K> getRayByIndex(const vint<K>& index); - - template<int K> - __forceinline RayK<K> getRayByIndex(const vbool<K>& valid, const vint<K>& index) - { - const vint<K> valid_index = select(valid, index, vintx(zero)); - return getRayByIndex(valid_index); - } - - template<int K> - __forceinline void setHitByIndex(const vbool<K>& valid_i, const vint<K>& index, const RayHitK<K>& ray) - { - vbool<K> valid = valid_i; - valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID); - - if (likely(any(valid))) - { - size_t valid_bits = movemask(valid); - while (valid_bits != 0) - { - const size_t k = bscf(valid_bits); - RayHit* __restrict__ ray_k = (RayHit*)ptr[index[k]]; - - ray_k->tfar = ray.tfar[k]; - ray_k->Ng.x = ray.Ng.x[k]; - ray_k->Ng.y = ray.Ng.y[k]; - ray_k->Ng.z = ray.Ng.z[k]; - ray_k->u = ray.u[k]; - ray_k->v = ray.v[k]; - ray_k->primID = ray.primID[k]; - ray_k->geomID = ray.geomID[k]; - instance_id_stack::copy(ray.instID, ray_k->instID, k); - } - } - } - - template<int K> - __forceinline void setHitByIndex(const vbool<K>& valid_i, const vint<K>& index, const RayK<K>& ray) - { - vbool<K> valid = valid_i; - valid &= (ray.tfar < 0.0f); - - if (likely(any(valid))) - { - size_t valid_bits = movemask(valid); - while (valid_bits != 0) - { - const size_t k = bscf(valid_bits); - Ray* __restrict__ ray_k = ptr[index[k]]; - - ray_k->tfar = ray.tfar[k]; - } - } - } - - Ray** __restrict__ ptr; - }; - - template<> - __forceinline Ray4 RayStreamAOP::getRayByIndex(const vint4& index) - { - Ray4 ray; - - /* load and transpose: org.x, org.y, org.z, tnear */ - const vfloat4 a0 = vfloat4::loadu(&ptr[index[0]]->org); - const vfloat4 a1 = vfloat4::loadu(&ptr[index[1]]->org); - const vfloat4 a2 = vfloat4::loadu(&ptr[index[2]]->org); - const vfloat4 a3 = vfloat4::loadu(&ptr[index[3]]->org); - - transpose(a0,a1,a2,a3, ray.org.x, ray.org.y, ray.org.z, ray.tnear()); - - /* load and transpose: dir.x, dir.y, dir.z, time */ - const vfloat4 b0 = vfloat4::loadu(&ptr[index[0]]->dir); - const vfloat4 b1 = vfloat4::loadu(&ptr[index[1]]->dir); - const vfloat4 b2 = vfloat4::loadu(&ptr[index[2]]->dir); - const vfloat4 b3 = vfloat4::loadu(&ptr[index[3]]->dir); - - transpose(b0,b1,b2,b3, ray.dir.x, ray.dir.y, ray.dir.z, ray.time()); - - /* load and transpose: tfar, mask, id, flags */ - const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar); - const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar); - const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar); - const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar); - - vfloat4 maskf, idf, flagsf; - transpose(c0,c1,c2,c3, ray.tfar, maskf, idf, flagsf); - ray.mask = asInt(maskf); - ray.id = asInt(idf); - ray.flags = asInt(flagsf); - - return ray; - } - -#if defined(__AVX__) - template<> - __forceinline Ray8 RayStreamAOP::getRayByIndex(const vint8& index) - { - Ray8 ray; - - /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */ - const vfloat8 ab0 = vfloat8::loadu(&ptr[index[0]]->org); - const vfloat8 ab1 = vfloat8::loadu(&ptr[index[1]]->org); - const vfloat8 ab2 = vfloat8::loadu(&ptr[index[2]]->org); - const vfloat8 ab3 = vfloat8::loadu(&ptr[index[3]]->org); - const vfloat8 ab4 = vfloat8::loadu(&ptr[index[4]]->org); - const vfloat8 ab5 = vfloat8::loadu(&ptr[index[5]]->org); - const vfloat8 ab6 = vfloat8::loadu(&ptr[index[6]]->org); - const vfloat8 ab7 = vfloat8::loadu(&ptr[index[7]]->org); - - transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time()); - - /* load and transpose: tfar, mask, id, flags */ - const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar); - const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar); - const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar); - const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar); - const vfloat4 c4 = vfloat4::loadu(&ptr[index[4]]->tfar); - const vfloat4 c5 = vfloat4::loadu(&ptr[index[5]]->tfar); - const vfloat4 c6 = vfloat4::loadu(&ptr[index[6]]->tfar); - const vfloat4 c7 = vfloat4::loadu(&ptr[index[7]]->tfar); - - vfloat8 maskf, idf, flagsf; - transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf); - ray.mask = asInt(maskf); - ray.id = asInt(idf); - ray.flags = asInt(flagsf); - - return ray; - } -#endif - -#if defined(__AVX512F__) - template<> - __forceinline Ray16 RayStreamAOP::getRayByIndex(const vint16& index) - { - Ray16 ray; - - /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */ - const vfloat8 ab0 = vfloat8::loadu(&ptr[index[0]]->org); - const vfloat8 ab1 = vfloat8::loadu(&ptr[index[1]]->org); - const vfloat8 ab2 = vfloat8::loadu(&ptr[index[2]]->org); - const vfloat8 ab3 = vfloat8::loadu(&ptr[index[3]]->org); - const vfloat8 ab4 = vfloat8::loadu(&ptr[index[4]]->org); - const vfloat8 ab5 = vfloat8::loadu(&ptr[index[5]]->org); - const vfloat8 ab6 = vfloat8::loadu(&ptr[index[6]]->org); - const vfloat8 ab7 = vfloat8::loadu(&ptr[index[7]]->org); - const vfloat8 ab8 = vfloat8::loadu(&ptr[index[8]]->org); - const vfloat8 ab9 = vfloat8::loadu(&ptr[index[9]]->org); - const vfloat8 ab10 = vfloat8::loadu(&ptr[index[10]]->org); - const vfloat8 ab11 = vfloat8::loadu(&ptr[index[11]]->org); - const vfloat8 ab12 = vfloat8::loadu(&ptr[index[12]]->org); - const vfloat8 ab13 = vfloat8::loadu(&ptr[index[13]]->org); - const vfloat8 ab14 = vfloat8::loadu(&ptr[index[14]]->org); - const vfloat8 ab15 = vfloat8::loadu(&ptr[index[15]]->org); - - transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15, - ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time()); - - /* load and transpose: tfar, mask, id, flags */ - const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar); - const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar); - const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar); - const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar); - const vfloat4 c4 = vfloat4::loadu(&ptr[index[4]]->tfar); - const vfloat4 c5 = vfloat4::loadu(&ptr[index[5]]->tfar); - const vfloat4 c6 = vfloat4::loadu(&ptr[index[6]]->tfar); - const vfloat4 c7 = vfloat4::loadu(&ptr[index[7]]->tfar); - const vfloat4 c8 = vfloat4::loadu(&ptr[index[8]]->tfar); - const vfloat4 c9 = vfloat4::loadu(&ptr[index[9]]->tfar); - const vfloat4 c10 = vfloat4::loadu(&ptr[index[10]]->tfar); - const vfloat4 c11 = vfloat4::loadu(&ptr[index[11]]->tfar); - const vfloat4 c12 = vfloat4::loadu(&ptr[index[12]]->tfar); - const vfloat4 c13 = vfloat4::loadu(&ptr[index[13]]->tfar); - const vfloat4 c14 = vfloat4::loadu(&ptr[index[14]]->tfar); - const vfloat4 c15 = vfloat4::loadu(&ptr[index[15]]->tfar); - - vfloat16 maskf, idf, flagsf; - transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15, - ray.tfar, maskf, idf, flagsf); - - ray.mask = asInt(maskf); - ray.id = asInt(idf); - ray.flags = asInt(flagsf); - - return ray; - } -#endif -} diff --git a/thirdparty/embree-aarch64/kernels/common/rtcore.cpp b/thirdparty/embree-aarch64/kernels/common/rtcore.cpp deleted file mode 100644 index 625fbf6d4f..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/rtcore.cpp +++ /dev/null @@ -1,1799 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#define RTC_EXPORT_API - -#include "default.h" -#include "device.h" -#include "scene.h" -#include "context.h" -#include "../../include/embree3/rtcore_ray.h" - -#if defined(__aarch64__) && defined(BUILD_IOS) -#include <mutex> -#endif - -using namespace embree; - -RTC_NAMESPACE_BEGIN; - - /* mutex to make API thread safe */ -#if defined(__aarch64__) && defined(BUILD_IOS) - static std::mutex g_mutex; -#else - static MutexSys g_mutex; -#endif - - RTC_API RTCDevice rtcNewDevice(const char* config) - { - RTC_CATCH_BEGIN; - RTC_TRACE(rtcNewDevice); -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(g_mutex); -#else - Lock<MutexSys> lock(g_mutex); -#endif - Device* device = new Device(config); - return (RTCDevice) device->refInc(); - RTC_CATCH_END(nullptr); - return (RTCDevice) nullptr; - } - - RTC_API void rtcRetainDevice(RTCDevice hdevice) - { - Device* device = (Device*) hdevice; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcRetainDevice); - RTC_VERIFY_HANDLE(hdevice); -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(g_mutex); -#else - Lock<MutexSys> lock(g_mutex); -#endif - device->refInc(); - RTC_CATCH_END(nullptr); - } - - RTC_API void rtcReleaseDevice(RTCDevice hdevice) - { - Device* device = (Device*) hdevice; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcReleaseDevice); - RTC_VERIFY_HANDLE(hdevice); -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(g_mutex); -#else - Lock<MutexSys> lock(g_mutex); -#endif - device->refDec(); - RTC_CATCH_END(nullptr); - } - - RTC_API ssize_t rtcGetDeviceProperty(RTCDevice hdevice, RTCDeviceProperty prop) - { - Device* device = (Device*) hdevice; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcGetDeviceProperty); - RTC_VERIFY_HANDLE(hdevice); -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(g_mutex); -#else - Lock<MutexSys> lock(g_mutex); -#endif - return device->getProperty(prop); - RTC_CATCH_END(device); - return 0; - } - - RTC_API void rtcSetDeviceProperty(RTCDevice hdevice, const RTCDeviceProperty prop, ssize_t val) - { - Device* device = (Device*) hdevice; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetDeviceProperty); - const bool internal_prop = (size_t)prop >= 1000000 && (size_t)prop < 1000004; - if (!internal_prop) RTC_VERIFY_HANDLE(hdevice); // allow NULL device for special internal settings -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(g_mutex); -#else - Lock<MutexSys> lock(g_mutex); -#endif - device->setProperty(prop,val); - RTC_CATCH_END(device); - } - - RTC_API RTCError rtcGetDeviceError(RTCDevice hdevice) - { - Device* device = (Device*) hdevice; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcGetDeviceError); - if (device == nullptr) return Device::getThreadErrorCode(); - else return device->getDeviceErrorCode(); - RTC_CATCH_END(device); - return RTC_ERROR_UNKNOWN; - } - - RTC_API void rtcSetDeviceErrorFunction(RTCDevice hdevice, RTCErrorFunction error, void* userPtr) - { - Device* device = (Device*) hdevice; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetDeviceErrorFunction); - RTC_VERIFY_HANDLE(hdevice); - device->setErrorFunction(error, userPtr); - RTC_CATCH_END(device); - } - - RTC_API void rtcSetDeviceMemoryMonitorFunction(RTCDevice hdevice, RTCMemoryMonitorFunction memoryMonitor, void* userPtr) - { - Device* device = (Device*) hdevice; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetDeviceMemoryMonitorFunction); - device->setMemoryMonitorFunction(memoryMonitor, userPtr); - RTC_CATCH_END(device); - } - - RTC_API RTCBuffer rtcNewBuffer(RTCDevice hdevice, size_t byteSize) - { - RTC_CATCH_BEGIN; - RTC_TRACE(rtcNewBuffer); - RTC_VERIFY_HANDLE(hdevice); - Buffer* buffer = new Buffer((Device*)hdevice, byteSize); - return (RTCBuffer)buffer->refInc(); - RTC_CATCH_END((Device*)hdevice); - return nullptr; - } - - RTC_API RTCBuffer rtcNewSharedBuffer(RTCDevice hdevice, void* ptr, size_t byteSize) - { - RTC_CATCH_BEGIN; - RTC_TRACE(rtcNewSharedBuffer); - RTC_VERIFY_HANDLE(hdevice); - Buffer* buffer = new Buffer((Device*)hdevice, byteSize, ptr); - return (RTCBuffer)buffer->refInc(); - RTC_CATCH_END((Device*)hdevice); - return nullptr; - } - - RTC_API void* rtcGetBufferData(RTCBuffer hbuffer) - { - Buffer* buffer = (Buffer*)hbuffer; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcGetBufferData); - RTC_VERIFY_HANDLE(hbuffer); - return buffer->data(); - RTC_CATCH_END2(buffer); - return nullptr; - } - - RTC_API void rtcRetainBuffer(RTCBuffer hbuffer) - { - Buffer* buffer = (Buffer*)hbuffer; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcRetainBuffer); - RTC_VERIFY_HANDLE(hbuffer); - buffer->refInc(); - RTC_CATCH_END2(buffer); - } - - RTC_API void rtcReleaseBuffer(RTCBuffer hbuffer) - { - Buffer* buffer = (Buffer*)hbuffer; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcReleaseBuffer); - RTC_VERIFY_HANDLE(hbuffer); - buffer->refDec(); - RTC_CATCH_END2(buffer); - } - - RTC_API RTCScene rtcNewScene (RTCDevice hdevice) - { - RTC_CATCH_BEGIN; - RTC_TRACE(rtcNewScene); - RTC_VERIFY_HANDLE(hdevice); - Scene* scene = new Scene((Device*)hdevice); - return (RTCScene) scene->refInc(); - RTC_CATCH_END((Device*)hdevice); - return nullptr; - } - - RTC_API RTCDevice rtcGetSceneDevice(RTCScene hscene) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcGetSceneDevice); - RTC_VERIFY_HANDLE(hscene); - return (RTCDevice)scene->device->refInc(); // user will own one additional device reference - RTC_CATCH_END2(scene); - return (RTCDevice)nullptr; - } - - RTC_API void rtcSetSceneProgressMonitorFunction(RTCScene hscene, RTCProgressMonitorFunction progress, void* ptr) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetSceneProgressMonitorFunction); - RTC_VERIFY_HANDLE(hscene); -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(g_mutex); -#else - Lock<MutexSys> lock(g_mutex); -#endif - scene->setProgressMonitorFunction(progress,ptr); - RTC_CATCH_END2(scene); - } - - RTC_API void rtcSetSceneBuildQuality (RTCScene hscene, RTCBuildQuality quality) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetSceneBuildQuality); - RTC_VERIFY_HANDLE(hscene); - if (quality != RTC_BUILD_QUALITY_LOW && - quality != RTC_BUILD_QUALITY_MEDIUM && - quality != RTC_BUILD_QUALITY_HIGH) - // -- GODOT start -- - // throw std::runtime_error("invalid build quality"); - abort(); - // -- GODOT end -- - scene->setBuildQuality(quality); - RTC_CATCH_END2(scene); - } - - RTC_API void rtcSetSceneFlags (RTCScene hscene, RTCSceneFlags flags) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetSceneFlags); - RTC_VERIFY_HANDLE(hscene); - scene->setSceneFlags(flags); - RTC_CATCH_END2(scene); - } - - RTC_API RTCSceneFlags rtcGetSceneFlags(RTCScene hscene) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcGetSceneFlags); - RTC_VERIFY_HANDLE(hscene); - return scene->getSceneFlags(); - RTC_CATCH_END2(scene); - return RTC_SCENE_FLAG_NONE; - } - - RTC_API void rtcCommitScene (RTCScene hscene) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcCommitScene); - RTC_VERIFY_HANDLE(hscene); - scene->commit(false); - RTC_CATCH_END2(scene); - } - - RTC_API void rtcJoinCommitScene (RTCScene hscene) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcJoinCommitScene); - RTC_VERIFY_HANDLE(hscene); - scene->commit(true); - RTC_CATCH_END2(scene); - } - - RTC_API void rtcGetSceneBounds(RTCScene hscene, RTCBounds* bounds_o) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcGetSceneBounds); - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - BBox3fa bounds = scene->bounds.bounds(); - bounds_o->lower_x = bounds.lower.x; - bounds_o->lower_y = bounds.lower.y; - bounds_o->lower_z = bounds.lower.z; - bounds_o->align0 = 0; - bounds_o->upper_x = bounds.upper.x; - bounds_o->upper_y = bounds.upper.y; - bounds_o->upper_z = bounds.upper.z; - bounds_o->align1 = 0; - RTC_CATCH_END2(scene); - } - - RTC_API void rtcGetSceneLinearBounds(RTCScene hscene, RTCLinearBounds* bounds_o) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcGetSceneBounds); - RTC_VERIFY_HANDLE(hscene); - if (bounds_o == nullptr) - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid destination pointer"); - if (scene->isModified()) - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - - bounds_o->bounds0.lower_x = scene->bounds.bounds0.lower.x; - bounds_o->bounds0.lower_y = scene->bounds.bounds0.lower.y; - bounds_o->bounds0.lower_z = scene->bounds.bounds0.lower.z; - bounds_o->bounds0.align0 = 0; - bounds_o->bounds0.upper_x = scene->bounds.bounds0.upper.x; - bounds_o->bounds0.upper_y = scene->bounds.bounds0.upper.y; - bounds_o->bounds0.upper_z = scene->bounds.bounds0.upper.z; - bounds_o->bounds0.align1 = 0; - bounds_o->bounds1.lower_x = scene->bounds.bounds1.lower.x; - bounds_o->bounds1.lower_y = scene->bounds.bounds1.lower.y; - bounds_o->bounds1.lower_z = scene->bounds.bounds1.lower.z; - bounds_o->bounds1.align0 = 0; - bounds_o->bounds1.upper_x = scene->bounds.bounds1.upper.x; - bounds_o->bounds1.upper_y = scene->bounds.bounds1.upper.y; - bounds_o->bounds1.upper_z = scene->bounds.bounds1.upper.z; - bounds_o->bounds1.align1 = 0; - RTC_CATCH_END2(scene); - } - - RTC_API void rtcCollide (RTCScene hscene0, RTCScene hscene1, RTCCollideFunc callback, void* userPtr) - { - Scene* scene0 = (Scene*) hscene0; - Scene* scene1 = (Scene*) hscene1; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcCollide); -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene0); - RTC_VERIFY_HANDLE(hscene1); - if (scene0->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed"); - if (scene1->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed"); - if (scene0->device != scene1->device) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scenes are from different devices"); - auto nUserPrims0 = scene0->getNumPrimitives (Geometry::MTY_USER_GEOMETRY, false); - auto nUserPrims1 = scene1->getNumPrimitives (Geometry::MTY_USER_GEOMETRY, false); - if (scene0->numPrimitives() != nUserPrims0 && scene1->numPrimitives() != nUserPrims1) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scenes must only contain user geometries with a single timestep"); -#endif - scene0->intersectors.collide(scene0,scene1,callback,userPtr); - RTC_CATCH_END(scene0->device); - } - - inline bool pointQuery(Scene* scene, RTCPointQuery* query, RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void* userPtr) - { - bool changed = false; - if (userContext->instStackSize > 0) - { - const AffineSpace3fa transform = AffineSpace3fa_load_unaligned((AffineSpace3fa*)userContext->world2inst[userContext->instStackSize-1]); - - float similarityScale = 0.f; - const bool similtude = similarityTransform(transform, &similarityScale); - assert((similtude && similarityScale > 0) || (!similtude && similarityScale == 0.f)); - - PointQuery query_inst; - query_inst.p = xfmPoint(transform, Vec3fa(query->x, query->y, query->z)); - query_inst.radius = query->radius * similarityScale; - query_inst.time = query->time; - - PointQueryContext context_inst(scene, (PointQuery*)query, - similtude ? POINT_QUERY_TYPE_SPHERE : POINT_QUERY_TYPE_AABB, - queryFunc, userContext, similarityScale, userPtr); - changed = scene->intersectors.pointQuery((PointQuery*)&query_inst, &context_inst); - } - else - { - PointQueryContext context(scene, (PointQuery*)query, - POINT_QUERY_TYPE_SPHERE, queryFunc, userContext, 1.f, userPtr); - changed = scene->intersectors.pointQuery((PointQuery*)query, &context); - } - return changed; - } - - RTC_API bool rtcPointQuery(RTCScene hscene, RTCPointQuery* query, RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void* userPtr) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcPointQuery); -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - RTC_VERIFY_HANDLE(userContext); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed"); - if (((size_t)query) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "query not aligned to 16 bytes"); - if (((size_t)userContext) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "context not aligned to 16 bytes"); -#endif - - return pointQuery(scene, query, userContext, queryFunc, userPtr); - RTC_CATCH_END2_FALSE(scene); - } - - RTC_API bool rtcPointQuery4 (const int* valid, RTCScene hscene, RTCPointQuery4* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcPointQuery4); - -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed"); - if (((size_t)valid) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 16 bytes"); - if (((size_t)query) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "query not aligned to 16 bytes"); -#endif - STAT(size_t cnt=0; for (size_t i=0; i<4; i++) cnt += ((int*)valid)[i] == -1;); - STAT3(point_query.travs,cnt,cnt,cnt); - - bool changed = false; - PointQuery4* query4 = (PointQuery4*)query; - PointQuery query1; - for (size_t i=0; i<4; i++) { - if (!valid[i]) continue; - query4->get(i,query1); - changed |= pointQuery(scene, (RTCPointQuery*)&query1, userContext, queryFunc, userPtrN?userPtrN[i]:NULL); - query4->set(i,query1); - } - return changed; - RTC_CATCH_END2_FALSE(scene); - } - - RTC_API bool rtcPointQuery8 (const int* valid, RTCScene hscene, RTCPointQuery8* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcPointQuery8); - -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed"); - if (((size_t)valid) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 16 bytes"); - if (((size_t)query) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "query not aligned to 16 bytes"); -#endif - STAT(size_t cnt=0; for (size_t i=0; i<4; i++) cnt += ((int*)valid)[i] == -1;); - STAT3(point_query.travs,cnt,cnt,cnt); - - bool changed = false; - PointQuery8* query8 = (PointQuery8*)query; - PointQuery query1; - for (size_t i=0; i<8; i++) { - if (!valid[i]) continue; - query8->get(i,query1); - changed |= pointQuery(scene, (RTCPointQuery*)&query1, userContext, queryFunc, userPtrN?userPtrN[i]:NULL); - query8->set(i,query1); - } - return changed; - RTC_CATCH_END2_FALSE(scene); - } - - RTC_API bool rtcPointQuery16 (const int* valid, RTCScene hscene, RTCPointQuery16* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcPointQuery16); - -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene got not committed"); - if (((size_t)valid) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 16 bytes"); - if (((size_t)query) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "query not aligned to 16 bytes"); -#endif - STAT(size_t cnt=0; for (size_t i=0; i<4; i++) cnt += ((int*)valid)[i] == -1;); - STAT3(point_query.travs,cnt,cnt,cnt); - - bool changed = false; - PointQuery16* query16 = (PointQuery16*)query; - PointQuery query1; - for (size_t i=0; i<16; i++) { - if (!valid[i]) continue; - PointQuery query1; query16->get(i,query1); - changed |= pointQuery(scene, (RTCPointQuery*)&query1, userContext, queryFunc, userPtrN?userPtrN[i]:NULL); - query16->set(i,query1); - } - return changed; - RTC_CATCH_END2_FALSE(scene); - } - - RTC_API void rtcIntersect1 (RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit* rayhit) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcIntersect1); -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - if (((size_t)rayhit) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 16 bytes"); -#endif - STAT3(normal.travs,1,1,1); - IntersectContext context(scene,user_context); - scene->intersectors.intersect(*rayhit,&context); -#if defined(DEBUG) - ((RayHit*)rayhit)->verifyHit(); -#endif - RTC_CATCH_END2(scene); - } - - RTC_API void rtcIntersect4 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit4* rayhit) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcIntersect4); - -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - if (((size_t)valid) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 16 bytes"); - if (((size_t)rayhit) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit not aligned to 16 bytes"); -#endif - STAT(size_t cnt=0; for (size_t i=0; i<4; i++) cnt += ((int*)valid)[i] == -1;); - STAT3(normal.travs,cnt,cnt,cnt); - - IntersectContext context(scene,user_context); -#if !defined(EMBREE_RAY_PACKETS) - RayHit4* rayhit4 = (RayHit4*)rayhit; - for (size_t i=0; i<4; i++) { - if (!valid[i]) continue; - RayHit ray1; rayhit4->get(i,ray1); - scene->intersectors.intersect((RTCRayHit&)ray1,&context); - rayhit4->set(i,ray1); - } -#else - scene->intersectors.intersect4(valid,*rayhit,&context); -#endif - - RTC_CATCH_END2(scene); - } - - RTC_API void rtcIntersect8 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit8* rayhit) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcIntersect8); - -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - if (((size_t)valid) & 0x1F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 32 bytes"); - if (((size_t)rayhit) & 0x1F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit not aligned to 32 bytes"); -#endif - STAT(size_t cnt=0; for (size_t i=0; i<8; i++) cnt += ((int*)valid)[i] == -1;); - STAT3(normal.travs,cnt,cnt,cnt); - - IntersectContext context(scene,user_context); -#if !defined(EMBREE_RAY_PACKETS) - RayHit8* rayhit8 = (RayHit8*) rayhit; - for (size_t i=0; i<8; i++) { - if (!valid[i]) continue; - RayHit ray1; rayhit8->get(i,ray1); - scene->intersectors.intersect((RTCRayHit&)ray1,&context); - rayhit8->set(i,ray1); - } -#else - if (likely(scene->intersectors.intersector8)) - scene->intersectors.intersect8(valid,*rayhit,&context); - else - scene->device->rayStreamFilters.intersectSOA(scene,(char*)rayhit,8,1,sizeof(RTCRayHit8),&context); -#endif - RTC_CATCH_END2(scene); - } - - RTC_API void rtcIntersect16 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit16* rayhit) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcIntersect16); - -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - if (((size_t)valid) & 0x3F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 64 bytes"); - if (((size_t)rayhit) & 0x3F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit not aligned to 64 bytes"); -#endif - STAT(size_t cnt=0; for (size_t i=0; i<16; i++) cnt += ((int*)valid)[i] == -1;); - STAT3(normal.travs,cnt,cnt,cnt); - - IntersectContext context(scene,user_context); -#if !defined(EMBREE_RAY_PACKETS) - RayHit16* rayhit16 = (RayHit16*) rayhit; - for (size_t i=0; i<16; i++) { - if (!valid[i]) continue; - RayHit ray1; rayhit16->get(i,ray1); - scene->intersectors.intersect((RTCRayHit&)ray1,&context); - rayhit16->set(i,ray1); - } -#else - if (likely(scene->intersectors.intersector16)) - scene->intersectors.intersect16(valid,*rayhit,&context); - else - scene->device->rayStreamFilters.intersectSOA(scene,(char*)rayhit,16,1,sizeof(RTCRayHit16),&context); -#endif - RTC_CATCH_END2(scene); - } - - RTC_API void rtcIntersect1M (RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit* rayhit, unsigned int M, size_t byteStride) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcIntersect1M); - -#if defined (EMBREE_RAY_PACKETS) -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - if (((size_t)rayhit ) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes"); -#endif - STAT3(normal.travs,M,M,M); - IntersectContext context(scene,user_context); - - /* fast codepath for single rays */ - if (likely(M == 1)) { - if (likely(rayhit->ray.tnear <= rayhit->ray.tfar)) - scene->intersectors.intersect(*rayhit,&context); - } - - /* codepath for streams */ - else { - scene->device->rayStreamFilters.intersectAOS(scene,rayhit,M,byteStride,&context); - } -#else - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect1M not supported"); -#endif - RTC_CATCH_END2(scene); - } - - RTC_API void rtcIntersect1Mp (RTCScene hscene, RTCIntersectContext* user_context, RTCRayHit** rn, unsigned int M) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcIntersect1Mp); - -#if defined (EMBREE_RAY_PACKETS) -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - if (((size_t)rn) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes"); -#endif - STAT3(normal.travs,M,M,M); - IntersectContext context(scene,user_context); - - /* fast codepath for single rays */ - if (likely(M == 1)) { - if (likely(rn[0]->ray.tnear <= rn[0]->ray.tfar)) - scene->intersectors.intersect(*rn[0],&context); - } - - /* codepath for streams */ - else { - scene->device->rayStreamFilters.intersectAOP(scene,rn,M,&context); - } -#else - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect1Mp not supported"); -#endif - RTC_CATCH_END2(scene); - } - - RTC_API void rtcIntersectNM (RTCScene hscene, RTCIntersectContext* user_context, struct RTCRayHitN* rayhit, unsigned int N, unsigned int M, size_t byteStride) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcIntersectNM); - -#if defined (EMBREE_RAY_PACKETS) -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - if (((size_t)rayhit) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes"); -#endif - STAT3(normal.travs,N*M,N*M,N*M); - IntersectContext context(scene,user_context); - - /* code path for single ray streams */ - if (likely(N == 1)) - { - /* fast code path for streams of size 1 */ - if (likely(M == 1)) { - if (likely(((RTCRayHit*)rayhit)->ray.tnear <= ((RTCRayHit*)rayhit)->ray.tfar)) - scene->intersectors.intersect(*(RTCRayHit*)rayhit,&context); - } - /* normal codepath for single ray streams */ - else { - scene->device->rayStreamFilters.intersectAOS(scene,(RTCRayHit*)rayhit,M,byteStride,&context); - } - } - /* code path for ray packet streams */ - else { - scene->device->rayStreamFilters.intersectSOA(scene,(char*)rayhit,N,M,byteStride,&context); - } -#else - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersectNM not supported"); -#endif - RTC_CATCH_END2(scene); - } - - RTC_API void rtcIntersectNp (RTCScene hscene, RTCIntersectContext* user_context, const RTCRayHitNp* rayhit, unsigned int N) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcIntersectNp); - -#if defined (EMBREE_RAY_PACKETS) -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - if (((size_t)rayhit->ray.org_x ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.org_x not aligned to 4 bytes"); - if (((size_t)rayhit->ray.org_y ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.org_y not aligned to 4 bytes"); - if (((size_t)rayhit->ray.org_z ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.org_z not aligned to 4 bytes"); - if (((size_t)rayhit->ray.dir_x ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.dir_x not aligned to 4 bytes"); - if (((size_t)rayhit->ray.dir_y ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.dir_y not aligned to 4 bytes"); - if (((size_t)rayhit->ray.dir_z ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.dir_z not aligned to 4 bytes"); - if (((size_t)rayhit->ray.tnear ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.dir_x not aligned to 4 bytes"); - if (((size_t)rayhit->ray.tfar ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.tnear not aligned to 4 bytes"); - if (((size_t)rayhit->ray.time ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.time not aligned to 4 bytes"); - if (((size_t)rayhit->ray.mask ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->ray.mask not aligned to 4 bytes"); - if (((size_t)rayhit->hit.Ng_x ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.Ng_x not aligned to 4 bytes"); - if (((size_t)rayhit->hit.Ng_y ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.Ng_y not aligned to 4 bytes"); - if (((size_t)rayhit->hit.Ng_z ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.Ng_z not aligned to 4 bytes"); - if (((size_t)rayhit->hit.u ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.u not aligned to 4 bytes"); - if (((size_t)rayhit->hit.v ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.v not aligned to 4 bytes"); - if (((size_t)rayhit->hit.geomID) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.geomID not aligned to 4 bytes"); - if (((size_t)rayhit->hit.primID) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.primID not aligned to 4 bytes"); - if (((size_t)rayhit->hit.instID) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "rayhit->hit.instID not aligned to 4 bytes"); -#endif - STAT3(normal.travs,N,N,N); - IntersectContext context(scene,user_context); - scene->device->rayStreamFilters.intersectSOP(scene,rayhit,N,&context); -#else - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersectNp not supported"); -#endif - RTC_CATCH_END2(scene); - } - - RTC_API void rtcOccluded1 (RTCScene hscene, RTCIntersectContext* user_context, RTCRay* ray) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcOccluded1); - STAT3(shadow.travs,1,1,1); -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - if (((size_t)ray) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 16 bytes"); -#endif - IntersectContext context(scene,user_context); - scene->intersectors.occluded(*ray,&context); - RTC_CATCH_END2(scene); - } - - RTC_API void rtcOccluded4 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRay4* ray) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcOccluded4); - -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - if (((size_t)valid) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 16 bytes"); - if (((size_t)ray) & 0x0F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 16 bytes"); -#endif - STAT(size_t cnt=0; for (size_t i=0; i<4; i++) cnt += ((int*)valid)[i] == -1;); - STAT3(shadow.travs,cnt,cnt,cnt); - - IntersectContext context(scene,user_context); -#if !defined(EMBREE_RAY_PACKETS) - Ray4* ray4 = (Ray4*) ray; - for (size_t i=0; i<4; i++) { - if (!valid[i]) continue; - Ray ray1; ray4->get(i,ray1); - scene->intersectors.occluded((RTCRay&)ray1,&context); - ray4->set(i,ray1); - } -#else - scene->intersectors.occluded4(valid,*ray,&context); -#endif - - RTC_CATCH_END2(scene); - } - - RTC_API void rtcOccluded8 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRay8* ray) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcOccluded8); - -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - if (((size_t)valid) & 0x1F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 32 bytes"); - if (((size_t)ray) & 0x1F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 32 bytes"); -#endif - STAT(size_t cnt=0; for (size_t i=0; i<8; i++) cnt += ((int*)valid)[i] == -1;); - STAT3(shadow.travs,cnt,cnt,cnt); - - IntersectContext context(scene,user_context); -#if !defined(EMBREE_RAY_PACKETS) - Ray8* ray8 = (Ray8*) ray; - for (size_t i=0; i<8; i++) { - if (!valid[i]) continue; - Ray ray1; ray8->get(i,ray1); - scene->intersectors.occluded((RTCRay&)ray1,&context); - ray8->set(i,ray1); - } -#else - if (likely(scene->intersectors.intersector8)) - scene->intersectors.occluded8(valid,*ray,&context); - else - scene->device->rayStreamFilters.occludedSOA(scene,(char*)ray,8,1,sizeof(RTCRay8),&context); -#endif - - RTC_CATCH_END2(scene); - } - - RTC_API void rtcOccluded16 (const int* valid, RTCScene hscene, RTCIntersectContext* user_context, RTCRay16* ray) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcOccluded16); - -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - if (((size_t)valid) & 0x3F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 64 bytes"); - if (((size_t)ray) & 0x3F) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 64 bytes"); -#endif - STAT(size_t cnt=0; for (size_t i=0; i<16; i++) cnt += ((int*)valid)[i] == -1;); - STAT3(shadow.travs,cnt,cnt,cnt); - - IntersectContext context(scene,user_context); -#if !defined(EMBREE_RAY_PACKETS) - Ray16* ray16 = (Ray16*) ray; - for (size_t i=0; i<16; i++) { - if (!valid[i]) continue; - Ray ray1; ray16->get(i,ray1); - scene->intersectors.occluded((RTCRay&)ray1,&context); - ray16->set(i,ray1); - } -#else - if (likely(scene->intersectors.intersector16)) - scene->intersectors.occluded16(valid,*ray,&context); - else - scene->device->rayStreamFilters.occludedSOA(scene,(char*)ray,16,1,sizeof(RTCRay16),&context); -#endif - - RTC_CATCH_END2(scene); - } - - RTC_API void rtcOccluded1M(RTCScene hscene, RTCIntersectContext* user_context, RTCRay* ray, unsigned int M, size_t byteStride) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcOccluded1M); - -#if defined (EMBREE_RAY_PACKETS) -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - if (((size_t)ray) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes"); -#endif - STAT3(shadow.travs,M,M,M); - IntersectContext context(scene,user_context); - /* fast codepath for streams of size 1 */ - if (likely(M == 1)) { - if (likely(ray->tnear <= ray->tfar)) - scene->intersectors.occluded (*ray,&context); - } - /* codepath for normal streams */ - else { - scene->device->rayStreamFilters.occludedAOS(scene,ray,M,byteStride,&context); - } -#else - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcOccluded1M not supported"); -#endif - RTC_CATCH_END2(scene); - } - - RTC_API void rtcOccluded1Mp(RTCScene hscene, RTCIntersectContext* user_context, RTCRay** ray, unsigned int M) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcOccluded1Mp); - -#if defined (EMBREE_RAY_PACKETS) -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - if (((size_t)ray) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes"); -#endif - STAT3(shadow.travs,M,M,M); - IntersectContext context(scene,user_context); - - /* fast codepath for streams of size 1 */ - if (likely(M == 1)) { - if (likely(ray[0]->tnear <= ray[0]->tfar)) - scene->intersectors.occluded (*ray[0],&context); - } - /* codepath for normal streams */ - else { - scene->device->rayStreamFilters.occludedAOP(scene,ray,M,&context); - } -#else - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcOccluded1Mp not supported"); -#endif - RTC_CATCH_END2(scene); - } - - RTC_API void rtcOccludedNM(RTCScene hscene, RTCIntersectContext* user_context, RTCRayN* ray, unsigned int N, unsigned int M, size_t byteStride) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcOccludedNM); - -#if defined (EMBREE_RAY_PACKETS) -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (byteStride < sizeof(RTCRayHit)) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"byteStride too small"); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - if (((size_t)ray) & 0x03) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "ray not aligned to 4 bytes"); -#endif - STAT3(shadow.travs,N*M,N*N,N*N); - IntersectContext context(scene,user_context); - - /* codepath for single rays */ - if (likely(N == 1)) - { - /* fast path for streams of size 1 */ - if (likely(M == 1)) { - if (likely(((RTCRay*)ray)->tnear <= ((RTCRay*)ray)->tfar)) - scene->intersectors.occluded (*(RTCRay*)ray,&context); - } - /* codepath for normal ray streams */ - else { - scene->device->rayStreamFilters.occludedAOS(scene,(RTCRay*)ray,M,byteStride,&context); - } - } - /* code path for ray packet streams */ - else { - scene->device->rayStreamFilters.occludedSOA(scene,(char*)ray,N,M,byteStride,&context); - } -#else - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcOccludedNM not supported"); -#endif - RTC_CATCH_END2(scene); - } - - RTC_API void rtcOccludedNp(RTCScene hscene, RTCIntersectContext* user_context, const RTCRayNp* ray, unsigned int N) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcOccludedNp); - -#if defined (EMBREE_RAY_PACKETS) -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - if (scene->isModified()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); - if (((size_t)ray->org_x ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "org_x not aligned to 4 bytes"); - if (((size_t)ray->org_y ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "org_y not aligned to 4 bytes"); - if (((size_t)ray->org_z ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "org_z not aligned to 4 bytes"); - if (((size_t)ray->dir_x ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "dir_x not aligned to 4 bytes"); - if (((size_t)ray->dir_y ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "dir_y not aligned to 4 bytes"); - if (((size_t)ray->dir_z ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "dir_z not aligned to 4 bytes"); - if (((size_t)ray->tnear ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "dir_x not aligned to 4 bytes"); - if (((size_t)ray->tfar ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "tnear not aligned to 4 bytes"); - if (((size_t)ray->time ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "time not aligned to 4 bytes"); - if (((size_t)ray->mask ) & 0x03 ) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "mask not aligned to 4 bytes"); -#endif - STAT3(shadow.travs,N,N,N); - IntersectContext context(scene,user_context); - scene->device->rayStreamFilters.occludedSOP(scene,ray,N,&context); -#else - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcOccludedNp not supported"); -#endif - RTC_CATCH_END2(scene); - } - - RTC_API void rtcRetainScene (RTCScene hscene) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcRetainScene); - RTC_VERIFY_HANDLE(hscene); - scene->refInc(); - RTC_CATCH_END2(scene); - } - - RTC_API void rtcReleaseScene (RTCScene hscene) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcReleaseScene); - RTC_VERIFY_HANDLE(hscene); - scene->refDec(); - RTC_CATCH_END2(scene); - } - - RTC_API void rtcSetGeometryInstancedScene(RTCGeometry hgeometry, RTCScene hscene) - { - Geometry* geometry = (Geometry*) hgeometry; - Ref<Scene> scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryInstancedScene); - RTC_VERIFY_HANDLE(hgeometry); - RTC_VERIFY_HANDLE(hscene); - geometry->setInstancedScene(scene); - RTC_CATCH_END2(geometry); - } - - AffineSpace3fa loadTransform(RTCFormat format, const float* xfm) - { - AffineSpace3fa space = one; - switch (format) - { - case RTC_FORMAT_FLOAT3X4_ROW_MAJOR: - space = AffineSpace3fa(Vec3fa(xfm[ 0], xfm[ 4], xfm[ 8]), - Vec3fa(xfm[ 1], xfm[ 5], xfm[ 9]), - Vec3fa(xfm[ 2], xfm[ 6], xfm[10]), - Vec3fa(xfm[ 3], xfm[ 7], xfm[11])); - break; - - case RTC_FORMAT_FLOAT3X4_COLUMN_MAJOR: - space = AffineSpace3fa(Vec3fa(xfm[ 0], xfm[ 1], xfm[ 2]), - Vec3fa(xfm[ 3], xfm[ 4], xfm[ 5]), - Vec3fa(xfm[ 6], xfm[ 7], xfm[ 8]), - Vec3fa(xfm[ 9], xfm[10], xfm[11])); - break; - - case RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR: - space = AffineSpace3fa(Vec3fa(xfm[ 0], xfm[ 1], xfm[ 2]), - Vec3fa(xfm[ 4], xfm[ 5], xfm[ 6]), - Vec3fa(xfm[ 8], xfm[ 9], xfm[10]), - Vec3fa(xfm[12], xfm[13], xfm[14])); - break; - - default: - throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid matrix format"); - break; - } - return space; - } - - void storeTransform(const AffineSpace3fa& space, RTCFormat format, float* xfm) - { - switch (format) - { - case RTC_FORMAT_FLOAT3X4_ROW_MAJOR: - xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vy.x; xfm[ 2] = space.l.vz.x; xfm[ 3] = space.p.x; - xfm[ 4] = space.l.vx.y; xfm[ 5] = space.l.vy.y; xfm[ 6] = space.l.vz.y; xfm[ 7] = space.p.y; - xfm[ 8] = space.l.vx.z; xfm[ 9] = space.l.vy.z; xfm[10] = space.l.vz.z; xfm[11] = space.p.z; - break; - - case RTC_FORMAT_FLOAT3X4_COLUMN_MAJOR: - xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vx.y; xfm[ 2] = space.l.vx.z; - xfm[ 3] = space.l.vy.x; xfm[ 4] = space.l.vy.y; xfm[ 5] = space.l.vy.z; - xfm[ 6] = space.l.vz.x; xfm[ 7] = space.l.vz.y; xfm[ 8] = space.l.vz.z; - xfm[ 9] = space.p.x; xfm[10] = space.p.y; xfm[11] = space.p.z; - break; - - case RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR: - xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vx.y; xfm[ 2] = space.l.vx.z; xfm[ 3] = 0.f; - xfm[ 4] = space.l.vy.x; xfm[ 5] = space.l.vy.y; xfm[ 6] = space.l.vy.z; xfm[ 7] = 0.f; - xfm[ 8] = space.l.vz.x; xfm[ 9] = space.l.vz.y; xfm[10] = space.l.vz.z; xfm[11] = 0.f; - xfm[12] = space.p.x; xfm[13] = space.p.y; xfm[14] = space.p.z; xfm[15] = 1.f; - break; - - default: - throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid matrix format"); - break; - } - } - - RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeStep, RTCFormat format, const void* xfm) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryTransform); - RTC_VERIFY_HANDLE(hgeometry); - RTC_VERIFY_HANDLE(xfm); - const AffineSpace3fa transform = loadTransform(format, (const float*)xfm); - geometry->setTransform(transform, timeStep); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometryTransformQuaternion(RTCGeometry hgeometry, unsigned int timeStep, const RTCQuaternionDecomposition* qd) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryTransformQuaternion); - RTC_VERIFY_HANDLE(hgeometry); - RTC_VERIFY_HANDLE(qd); - - AffineSpace3fx transform; - transform.l.vx.x = qd->scale_x; - transform.l.vy.y = qd->scale_y; - transform.l.vz.z = qd->scale_z; - transform.l.vy.x = qd->skew_xy; - transform.l.vz.x = qd->skew_xz; - transform.l.vz.y = qd->skew_yz; - transform.l.vx.y = qd->translation_x; - transform.l.vx.z = qd->translation_y; - transform.l.vy.z = qd->translation_z; - transform.p.x = qd->shift_x; - transform.p.y = qd->shift_y; - transform.p.z = qd->shift_z; - - // normalize quaternion - Quaternion3f q(qd->quaternion_r, qd->quaternion_i, qd->quaternion_j, qd->quaternion_k); - q = normalize(q); - transform.l.vx.w = q.i; - transform.l.vy.w = q.j; - transform.l.vz.w = q.k; - transform.p.w = q.r; - - geometry->setQuaternionDecomposition(transform, timeStep); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcGetGeometryTransform(RTCGeometry hgeometry, float time, RTCFormat format, void* xfm) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcGetGeometryTransform); - const AffineSpace3fa transform = geometry->getTransform(time); - storeTransform(transform, format, (float*)xfm); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcFilterIntersection(const struct RTCIntersectFunctionNArguments* const args_i, const struct RTCFilterFunctionNArguments* filter_args) - { - IntersectFunctionNArguments* args = (IntersectFunctionNArguments*) args_i; - args->report(args,filter_args); - } - - RTC_API void rtcFilterOcclusion(const struct RTCOccludedFunctionNArguments* const args_i, const struct RTCFilterFunctionNArguments* filter_args) - { - OccludedFunctionNArguments* args = (OccludedFunctionNArguments*) args_i; - args->report(args,filter_args); - } - - RTC_API RTCGeometry rtcNewGeometry (RTCDevice hdevice, RTCGeometryType type) - { - Device* device = (Device*) hdevice; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcNewGeometry); - RTC_VERIFY_HANDLE(hdevice); - - switch (type) - { - case RTC_GEOMETRY_TYPE_TRIANGLE: - { -#if defined(EMBREE_GEOMETRY_TRIANGLE) - createTriangleMeshTy createTriangleMesh = nullptr; - SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createTriangleMesh); - Geometry* geom = createTriangleMesh(device); - return (RTCGeometry) geom->refInc(); -#else - throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_TRIANGLE is not supported"); -#endif - } - - case RTC_GEOMETRY_TYPE_QUAD: - { -#if defined(EMBREE_GEOMETRY_QUAD) - createQuadMeshTy createQuadMesh = nullptr; - SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createQuadMesh); - Geometry* geom = createQuadMesh(device); - return (RTCGeometry) geom->refInc(); -#else - throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_QUAD is not supported"); -#endif - } - - case RTC_GEOMETRY_TYPE_SPHERE_POINT: - case RTC_GEOMETRY_TYPE_DISC_POINT: - case RTC_GEOMETRY_TYPE_ORIENTED_DISC_POINT: - { -#if defined(EMBREE_GEOMETRY_POINT) - createPointsTy createPoints = nullptr; - SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_builder_cpu_features, createPoints); - - Geometry *geom; - switch(type) { - case RTC_GEOMETRY_TYPE_SPHERE_POINT: - geom = createPoints(device, Geometry::GTY_SPHERE_POINT); - break; - case RTC_GEOMETRY_TYPE_DISC_POINT: - geom = createPoints(device, Geometry::GTY_DISC_POINT); - break; - case RTC_GEOMETRY_TYPE_ORIENTED_DISC_POINT: - geom = createPoints(device, Geometry::GTY_ORIENTED_DISC_POINT); - break; - default: - geom = nullptr; - break; - } - return (RTCGeometry) geom->refInc(); -#else - throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_POINT is not supported"); -#endif - } - - case RTC_GEOMETRY_TYPE_CONE_LINEAR_CURVE: - case RTC_GEOMETRY_TYPE_ROUND_LINEAR_CURVE: - case RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE: - - case RTC_GEOMETRY_TYPE_ROUND_BEZIER_CURVE: - case RTC_GEOMETRY_TYPE_FLAT_BEZIER_CURVE: - case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BEZIER_CURVE: - - case RTC_GEOMETRY_TYPE_ROUND_BSPLINE_CURVE: - case RTC_GEOMETRY_TYPE_FLAT_BSPLINE_CURVE: - case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BSPLINE_CURVE: - - case RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE: - case RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE: - case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_HERMITE_CURVE: - - case RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE: - case RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE: - case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_CATMULL_ROM_CURVE: - { -#if defined(EMBREE_GEOMETRY_CURVE) - createLineSegmentsTy createLineSegments = nullptr; - SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createLineSegments); - createCurvesTy createCurves = nullptr; - SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createCurves); - - Geometry* geom; - switch (type) { - case RTC_GEOMETRY_TYPE_CONE_LINEAR_CURVE : geom = createLineSegments (device,Geometry::GTY_CONE_LINEAR_CURVE); break; - case RTC_GEOMETRY_TYPE_ROUND_LINEAR_CURVE : geom = createLineSegments (device,Geometry::GTY_ROUND_LINEAR_CURVE); break; - case RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE : geom = createLineSegments (device,Geometry::GTY_FLAT_LINEAR_CURVE); break; - //case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_LINEAR_CURVE : geom = createLineSegments (device,Geometry::GTY_ORIENTED_LINEAR_CURVE); break; - - case RTC_GEOMETRY_TYPE_ROUND_BEZIER_CURVE : geom = createCurves(device,Geometry::GTY_ROUND_BEZIER_CURVE); break; - case RTC_GEOMETRY_TYPE_FLAT_BEZIER_CURVE : geom = createCurves(device,Geometry::GTY_FLAT_BEZIER_CURVE); break; - case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BEZIER_CURVE : geom = createCurves(device,Geometry::GTY_ORIENTED_BEZIER_CURVE); break; - - case RTC_GEOMETRY_TYPE_ROUND_BSPLINE_CURVE : geom = createCurves(device,Geometry::GTY_ROUND_BSPLINE_CURVE); break; - case RTC_GEOMETRY_TYPE_FLAT_BSPLINE_CURVE : geom = createCurves(device,Geometry::GTY_FLAT_BSPLINE_CURVE); break; - case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BSPLINE_CURVE : geom = createCurves(device,Geometry::GTY_ORIENTED_BSPLINE_CURVE); break; - - case RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE : geom = createCurves(device,Geometry::GTY_ROUND_HERMITE_CURVE); break; - case RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE : geom = createCurves(device,Geometry::GTY_FLAT_HERMITE_CURVE); break; - case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_HERMITE_CURVE : geom = createCurves(device,Geometry::GTY_ORIENTED_HERMITE_CURVE); break; - - case RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE : geom = createCurves(device,Geometry::GTY_ROUND_CATMULL_ROM_CURVE); break; - case RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE : geom = createCurves(device,Geometry::GTY_FLAT_CATMULL_ROM_CURVE); break; - case RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_CATMULL_ROM_CURVE : geom = createCurves(device,Geometry::GTY_ORIENTED_CATMULL_ROM_CURVE); break; - default: geom = nullptr; break; - } - return (RTCGeometry) geom->refInc(); -#else - throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_CURVE is not supported"); -#endif - } - - case RTC_GEOMETRY_TYPE_SUBDIVISION: - { -#if defined(EMBREE_GEOMETRY_SUBDIVISION) - createSubdivMeshTy createSubdivMesh = nullptr; - SELECT_SYMBOL_DEFAULT_AVX(device->enabled_cpu_features,createSubdivMesh); - //SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createSubdivMesh); // FIXME: this does not work for some reason? - Geometry* geom = createSubdivMesh(device); - return (RTCGeometry) geom->refInc(); -#else - throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_SUBDIVISION is not supported"); -#endif - } - - case RTC_GEOMETRY_TYPE_USER: - { -#if defined(EMBREE_GEOMETRY_USER) - createUserGeometryTy createUserGeometry = nullptr; - SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createUserGeometry); - Geometry* geom = createUserGeometry(device); - return (RTCGeometry) geom->refInc(); -#else - throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_USER is not supported"); -#endif - } - - case RTC_GEOMETRY_TYPE_INSTANCE: - { -#if defined(EMBREE_GEOMETRY_INSTANCE) - createInstanceTy createInstance = nullptr; - SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createInstance); - Geometry* geom = createInstance(device); - return (RTCGeometry) geom->refInc(); -#else - throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_INSTANCE is not supported"); -#endif - } - - case RTC_GEOMETRY_TYPE_GRID: - { -#if defined(EMBREE_GEOMETRY_GRID) - createGridMeshTy createGridMesh = nullptr; - SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createGridMesh); - Geometry* geom = createGridMesh(device); - return (RTCGeometry) geom->refInc(); -#else - throw_RTCError(RTC_ERROR_UNKNOWN,"RTC_GEOMETRY_TYPE_GRID is not supported"); -#endif - } - - default: - throw_RTCError(RTC_ERROR_UNKNOWN,"invalid geometry type"); - } - - RTC_CATCH_END(device); - return nullptr; - } - - RTC_API void rtcSetGeometryUserPrimitiveCount(RTCGeometry hgeometry, unsigned int userPrimitiveCount) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryUserPrimitiveCount); - RTC_VERIFY_HANDLE(hgeometry); - - if (unlikely(geometry->getType() != Geometry::GTY_USER_GEOMETRY)) - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation only allowed for user geometries"); - - geometry->setNumPrimitives(userPrimitiveCount); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometryTimeStepCount(RTCGeometry hgeometry, unsigned int timeStepCount) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryTimeStepCount); - RTC_VERIFY_HANDLE(hgeometry); - - if (timeStepCount > RTC_MAX_TIME_STEP_COUNT) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"number of time steps is out of range"); - - geometry->setNumTimeSteps(timeStepCount); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometryTimeRange(RTCGeometry hgeometry, float startTime, float endTime) - { - Ref<Geometry> geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryTimeRange); - RTC_VERIFY_HANDLE(hgeometry); - - if (startTime > endTime) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"startTime has to be smaller or equal to the endTime"); - - geometry->setTimeRange(BBox1f(startTime,endTime)); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometryVertexAttributeCount(RTCGeometry hgeometry, unsigned int N) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryVertexAttributeCount); - RTC_VERIFY_HANDLE(hgeometry); - geometry->setVertexAttributeCount(N); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometryTopologyCount(RTCGeometry hgeometry, unsigned int N) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryTopologyCount); - RTC_VERIFY_HANDLE(hgeometry); - geometry->setTopologyCount(N); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometryBuildQuality (RTCGeometry hgeometry, RTCBuildQuality quality) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryBuildQuality); - RTC_VERIFY_HANDLE(hgeometry); - if (quality != RTC_BUILD_QUALITY_LOW && - quality != RTC_BUILD_QUALITY_MEDIUM && - quality != RTC_BUILD_QUALITY_HIGH && - quality != RTC_BUILD_QUALITY_REFIT) - // -- GODOT start -- - // throw std::runtime_error("invalid build quality"); - abort(); - // -- GODOT end -- - geometry->setBuildQuality(quality); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometryMaxRadiusScale(RTCGeometry hgeometry, float maxRadiusScale) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryMaxRadiusScale); - RTC_VERIFY_HANDLE(hgeometry); -#if RTC_MIN_WIDTH - if (maxRadiusScale < 1.0f) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"maximal radius scale has to be larger or equal to 1"); - geometry->setMaxRadiusScale(maxRadiusScale); -#else - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"min-width feature is not enabled"); -#endif - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometryMask (RTCGeometry hgeometry, unsigned int mask) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryMask); - RTC_VERIFY_HANDLE(hgeometry); - geometry->setMask(mask); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometrySubdivisionMode (RTCGeometry hgeometry, unsigned topologyID, RTCSubdivisionMode mode) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometrySubdivisionMode); - RTC_VERIFY_HANDLE(hgeometry); - geometry->setSubdivisionMode(topologyID,mode); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometryVertexAttributeTopology(RTCGeometry hgeometry, unsigned int vertexAttributeID, unsigned int topologyID) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryVertexAttributeTopology); - RTC_VERIFY_HANDLE(hgeometry); - geometry->setVertexAttributeTopology(vertexAttributeID, topologyID); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometryBuffer(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot, RTCFormat format, RTCBuffer hbuffer, size_t byteOffset, size_t byteStride, size_t itemCount) - { - Geometry* geometry = (Geometry*) hgeometry; - Ref<Buffer> buffer = (Buffer*)hbuffer; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryBuffer); - RTC_VERIFY_HANDLE(hgeometry); - RTC_VERIFY_HANDLE(hbuffer); - - if (geometry->device != buffer->device) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"inputs are from different devices"); - - if (itemCount > 0xFFFFFFFFu) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"buffer too large"); - - geometry->setBuffer(type, slot, format, buffer, byteOffset, byteStride, (unsigned int)itemCount); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetSharedGeometryBuffer(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot, RTCFormat format, const void* ptr, size_t byteOffset, size_t byteStride, size_t itemCount) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetSharedGeometryBuffer); - RTC_VERIFY_HANDLE(hgeometry); - - if (itemCount > 0xFFFFFFFFu) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"buffer too large"); - - Ref<Buffer> buffer = new Buffer(geometry->device, itemCount*byteStride, (char*)ptr + byteOffset); - geometry->setBuffer(type, slot, format, buffer, 0, byteStride, (unsigned int)itemCount); - RTC_CATCH_END2(geometry); - } - - RTC_API void* rtcSetNewGeometryBuffer(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot, RTCFormat format, size_t byteStride, size_t itemCount) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetNewGeometryBuffer); - RTC_VERIFY_HANDLE(hgeometry); - - if (itemCount > 0xFFFFFFFFu) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"buffer too large"); - - /* vertex buffers need to get overallocated slightly as elements are accessed using SSE loads */ - size_t bytes = itemCount*byteStride; - if (type == RTC_BUFFER_TYPE_VERTEX || type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) - bytes += (16 - (byteStride%16))%16; - - Ref<Buffer> buffer = new Buffer(geometry->device, bytes); - geometry->setBuffer(type, slot, format, buffer, 0, byteStride, (unsigned int)itemCount); - return buffer->data(); - RTC_CATCH_END2(geometry); - return nullptr; - } - - RTC_API void* rtcGetGeometryBufferData(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcGetGeometryBufferData); - RTC_VERIFY_HANDLE(hgeometry); - return geometry->getBuffer(type, slot); - RTC_CATCH_END2(geometry); - return nullptr; - } - - RTC_API void rtcEnableGeometry (RTCGeometry hgeometry) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcEnableGeometry); - RTC_VERIFY_HANDLE(hgeometry); - geometry->enable(); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcUpdateGeometryBuffer (RTCGeometry hgeometry, RTCBufferType type, unsigned int slot) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcUpdateGeometryBuffer); - RTC_VERIFY_HANDLE(hgeometry); - geometry->updateBuffer(type, slot); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcDisableGeometry (RTCGeometry hgeometry) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcDisableGeometry); - RTC_VERIFY_HANDLE(hgeometry); - geometry->disable(); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometryTessellationRate (RTCGeometry hgeometry, float tessellationRate) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryTessellationRate); - RTC_VERIFY_HANDLE(hgeometry); - geometry->setTessellationRate(tessellationRate); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometryUserData (RTCGeometry hgeometry, void* ptr) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryUserData); - RTC_VERIFY_HANDLE(hgeometry); - geometry->setUserData(ptr); - RTC_CATCH_END2(geometry); - } - - RTC_API void* rtcGetGeometryUserData (RTCGeometry hgeometry) - { - Geometry* geometry = (Geometry*) hgeometry; // no ref counting here! - RTC_CATCH_BEGIN; - RTC_TRACE(rtcGetGeometryUserData); - RTC_VERIFY_HANDLE(hgeometry); - return geometry->getUserData(); - RTC_CATCH_END2(geometry); - return nullptr; - } - - RTC_API void rtcSetGeometryBoundsFunction (RTCGeometry hgeometry, RTCBoundsFunction bounds, void* userPtr) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryBoundsFunction); - RTC_VERIFY_HANDLE(hgeometry); - geometry->setBoundsFunction(bounds,userPtr); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometryDisplacementFunction (RTCGeometry hgeometry, RTCDisplacementFunctionN displacement) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryDisplacementFunction); - RTC_VERIFY_HANDLE(hgeometry); - geometry->setDisplacementFunction(displacement); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometryIntersectFunction (RTCGeometry hgeometry, RTCIntersectFunctionN intersect) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryIntersectFunction); - RTC_VERIFY_HANDLE(hgeometry); - geometry->setIntersectFunctionN(intersect); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometryPointQueryFunction(RTCGeometry hgeometry, RTCPointQueryFunction pointQuery) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryPointQueryFunction); - RTC_VERIFY_HANDLE(hgeometry); - geometry->setPointQueryFunction(pointQuery); - RTC_CATCH_END2(geometry); - } - - RTC_API unsigned int rtcGetGeometryFirstHalfEdge(RTCGeometry hgeometry, unsigned int faceID) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcGetGeometryFirstHalfEdge); - return geometry->getFirstHalfEdge(faceID); - RTC_CATCH_END2(geometry); - return -1; - } - - RTC_API unsigned int rtcGetGeometryFace(RTCGeometry hgeometry, unsigned int edgeID) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcGetGeometryFace); - return geometry->getFace(edgeID); - RTC_CATCH_END2(geometry); - return -1; - } - - RTC_API unsigned int rtcGetGeometryNextHalfEdge(RTCGeometry hgeometry, unsigned int edgeID) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcGetGeometryNextHalfEdge); - return geometry->getNextHalfEdge(edgeID); - RTC_CATCH_END2(geometry); - return -1; - } - - RTC_API unsigned int rtcGetGeometryPreviousHalfEdge(RTCGeometry hgeometry, unsigned int edgeID) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcGetGeometryPreviousHalfEdge); - return geometry->getPreviousHalfEdge(edgeID); - RTC_CATCH_END2(geometry); - return -1; - } - - RTC_API unsigned int rtcGetGeometryOppositeHalfEdge(RTCGeometry hgeometry, unsigned int topologyID, unsigned int edgeID) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcGetGeometryOppositeHalfEdge); - return geometry->getOppositeHalfEdge(topologyID,edgeID); - RTC_CATCH_END2(geometry); - return -1; - } - - RTC_API void rtcSetGeometryOccludedFunction (RTCGeometry hgeometry, RTCOccludedFunctionN occluded) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetOccludedFunctionN); - RTC_VERIFY_HANDLE(hgeometry); - geometry->setOccludedFunctionN(occluded); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometryIntersectFilterFunction (RTCGeometry hgeometry, RTCFilterFunctionN filter) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryIntersectFilterFunction); - RTC_VERIFY_HANDLE(hgeometry); - geometry->setIntersectionFilterFunctionN(filter); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcSetGeometryOccludedFilterFunction (RTCGeometry hgeometry, RTCFilterFunctionN filter) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSetGeometryOccludedFilterFunction); - RTC_VERIFY_HANDLE(hgeometry); - geometry->setOcclusionFilterFunctionN(filter); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcInterpolate(const RTCInterpolateArguments* const args) - { - Geometry* geometry = (Geometry*) args->geometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcInterpolate); -#if defined(DEBUG) - RTC_VERIFY_HANDLE(args->geometry); -#endif - geometry->interpolate(args); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcInterpolateN(const RTCInterpolateNArguments* const args) - { - Geometry* geometry = (Geometry*) args->geometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcInterpolateN); -#if defined(DEBUG) - RTC_VERIFY_HANDLE(args->geometry); -#endif - geometry->interpolateN(args); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcCommitGeometry (RTCGeometry hgeometry) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcCommitGeometry); - RTC_VERIFY_HANDLE(hgeometry); - return geometry->commit(); - RTC_CATCH_END2(geometry); - } - - RTC_API unsigned int rtcAttachGeometry (RTCScene hscene, RTCGeometry hgeometry) - { - Scene* scene = (Scene*) hscene; - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcAttachGeometry); - RTC_VERIFY_HANDLE(hscene); - RTC_VERIFY_HANDLE(hgeometry); - if (scene->device != geometry->device) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"inputs are from different devices"); - return scene->bind(RTC_INVALID_GEOMETRY_ID,geometry); - RTC_CATCH_END2(scene); - return -1; - } - - RTC_API void rtcAttachGeometryByID (RTCScene hscene, RTCGeometry hgeometry, unsigned int geomID) - { - Scene* scene = (Scene*) hscene; - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcAttachGeometryByID); - RTC_VERIFY_HANDLE(hscene); - RTC_VERIFY_HANDLE(hgeometry); - RTC_VERIFY_GEOMID(geomID); - if (scene->device != geometry->device) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"inputs are from different devices"); - scene->bind(geomID,geometry); - RTC_CATCH_END2(scene); - } - - RTC_API void rtcDetachGeometry (RTCScene hscene, unsigned int geomID) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcDetachGeometry); - RTC_VERIFY_HANDLE(hscene); - RTC_VERIFY_GEOMID(geomID); - scene->detachGeometry(geomID); - RTC_CATCH_END2(scene); - } - - RTC_API void rtcRetainGeometry (RTCGeometry hgeometry) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcRetainGeometry); - RTC_VERIFY_HANDLE(hgeometry); - geometry->refInc(); - RTC_CATCH_END2(geometry); - } - - RTC_API void rtcReleaseGeometry (RTCGeometry hgeometry) - { - Geometry* geometry = (Geometry*) hgeometry; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcReleaseGeometry); - RTC_VERIFY_HANDLE(hgeometry); - geometry->refDec(); - RTC_CATCH_END2(geometry); - } - - RTC_API RTCGeometry rtcGetGeometry (RTCScene hscene, unsigned int geomID) - { - Scene* scene = (Scene*) hscene; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcGetGeometry); -#if defined(DEBUG) - RTC_VERIFY_HANDLE(hscene); - RTC_VERIFY_GEOMID(geomID); -#endif - return (RTCGeometry) scene->get(geomID); - RTC_CATCH_END2(scene); - return nullptr; - } - -RTC_NAMESPACE_END diff --git a/thirdparty/embree-aarch64/kernels/common/rtcore.h b/thirdparty/embree-aarch64/kernels/common/rtcore.h deleted file mode 100644 index 4b070e122b..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/rtcore.h +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../../include/embree3/rtcore.h" -RTC_NAMESPACE_USE - -namespace embree -{ - /*! decoding of intersection flags */ - __forceinline bool isCoherent (RTCIntersectContextFlags flags) { return (flags & RTC_INTERSECT_CONTEXT_FLAG_COHERENT) == RTC_INTERSECT_CONTEXT_FLAG_COHERENT; } - __forceinline bool isIncoherent(RTCIntersectContextFlags flags) { return (flags & RTC_INTERSECT_CONTEXT_FLAG_COHERENT) == RTC_INTERSECT_CONTEXT_FLAG_INCOHERENT; } - -#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR >= 8) -# define USE_TASK_ARENA 1 -#else -# define USE_TASK_ARENA 0 -#endif - -#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION >= 11009) // TBB 2019 Update 9 -# define TASKING_TBB_USE_TASK_ISOLATION 1 -#else -# define TASKING_TBB_USE_TASK_ISOLATION 0 -#endif - -/*! Macros used in the rtcore API implementation */ -// -- GODOT start -- -// #define RTC_CATCH_BEGIN try { -#define RTC_CATCH_BEGIN - -// #define RTC_CATCH_END(device) \ -// } catch (std::bad_alloc&) { \ -// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \ -// } catch (rtcore_error& e) { \ -// Device::process_error(device,e.error,e.what()); \ -// } catch (std::exception& e) { \ -// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \ -// } catch (...) { \ -// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \ -// } -#define RTC_CATCH_END(device) - -// #define RTC_CATCH_END2(scene) \ -// } catch (std::bad_alloc&) { \ -// Device* device = scene ? scene->device : nullptr; \ -// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \ -// } catch (rtcore_error& e) { \ -// Device* device = scene ? scene->device : nullptr; \ -// Device::process_error(device,e.error,e.what()); \ -// } catch (std::exception& e) { \ -// Device* device = scene ? scene->device : nullptr; \ -// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \ -// } catch (...) { \ -// Device* device = scene ? scene->device : nullptr; \ -// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \ -// } -#define RTC_CATCH_END2(scene) - -// #define RTC_CATCH_END2_FALSE(scene) \ -// } catch (std::bad_alloc&) { \ -// Device* device = scene ? scene->device : nullptr; \ -// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \ -// return false; \ -// } catch (rtcore_error& e) { \ -// Device* device = scene ? scene->device : nullptr; \ -// Device::process_error(device,e.error,e.what()); \ -// return false; \ -// } catch (std::exception& e) { \ -// Device* device = scene ? scene->device : nullptr; \ -// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \ -// return false; \ -// } catch (...) { \ -// Device* device = scene ? scene->device : nullptr; \ -// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \ -// return false; \ -// } -#define RTC_CATCH_END2_FALSE(scene) return false; -// -- GODOT end -- - -#define RTC_VERIFY_HANDLE(handle) \ - if (handle == nullptr) { \ - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \ - } - -#define RTC_VERIFY_GEOMID(id) \ - if (id == RTC_INVALID_GEOMETRY_ID) { \ - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \ - } - -#define RTC_VERIFY_UPPER(id,upper) \ - if (id > upper) { \ - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \ - } - -#define RTC_VERIFY_RANGE(id,lower,upper) \ - if (id < lower || id > upper) \ - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"argument out of bounds"); - -#if 0 // enable to debug print all API calls -#define RTC_TRACE(x) std::cout << #x << std::endl; -#else -#define RTC_TRACE(x) -#endif - -// -- GODOT begin -- -// /*! used to throw embree API errors */ -// struct rtcore_error : public std::exception -// { -// __forceinline rtcore_error(RTCError error, const std::string& str) -// : error(error), str(str) {} -// -// ~rtcore_error() throw() {} -// -// const char* what () const throw () { -// return str.c_str(); -// } -// -// RTCError error; -// std::string str; -// }; -// -- GODOT end -- - -#if defined(DEBUG) // only report file and line in debug mode - // -- GODOT begin -- - // #define throw_RTCError(error,str) \ - // throw rtcore_error(error,std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)); - #define throw_RTCError(error,str) \ - printf(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)), abort(); - // -- GODOT end -- -#else - // -- GODOT begin -- - // #define throw_RTCError(error,str) \ - // throw rtcore_error(error,str); - #define throw_RTCError(error,str) \ - abort(); - // -- GODOT end -- -#endif - -#define RTC_BUILD_ARGUMENTS_HAS(settings,member) \ - (settings.byteSize > (offsetof(RTCBuildArguments,member)+sizeof(settings.member))) -} diff --git a/thirdparty/embree-aarch64/kernels/common/rtcore_builder.cpp b/thirdparty/embree-aarch64/kernels/common/rtcore_builder.cpp deleted file mode 100644 index 6bb96bba07..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/rtcore_builder.cpp +++ /dev/null @@ -1,442 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#define RTC_EXPORT_API - -#include "default.h" -#include "device.h" -#include "scene.h" -#include "context.h" -#include "alloc.h" - -#include "../builders/bvh_builder_sah.h" -#include "../builders/bvh_builder_morton.h" - -namespace embree -{ - namespace isa // FIXME: support more ISAs for builders - { - struct BVH : public RefCount - { - BVH (Device* device) - : device(device), allocator(device,true), morton_src(device,0), morton_tmp(device,0) - { - device->refInc(); - } - - ~BVH() { - device->refDec(); - } - - public: - Device* device; - FastAllocator allocator; - mvector<BVHBuilderMorton::BuildPrim> morton_src; - mvector<BVHBuilderMorton::BuildPrim> morton_tmp; - }; - - void* rtcBuildBVHMorton(const RTCBuildArguments* arguments) - { - BVH* bvh = (BVH*) arguments->bvh; - RTCBuildPrimitive* prims_i = arguments->primitives; - size_t primitiveCount = arguments->primitiveCount; - RTCCreateNodeFunction createNode = arguments->createNode; - RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren; - RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds; - RTCCreateLeafFunction createLeaf = arguments->createLeaf; - RTCProgressMonitorFunction buildProgress = arguments->buildProgress; - void* userPtr = arguments->userPtr; - - std::atomic<size_t> progress(0); - - /* initialize temporary arrays for morton builder */ - PrimRef* prims = (PrimRef*) prims_i; - mvector<BVHBuilderMorton::BuildPrim>& morton_src = bvh->morton_src; - mvector<BVHBuilderMorton::BuildPrim>& morton_tmp = bvh->morton_tmp; - morton_src.resize(primitiveCount); - morton_tmp.resize(primitiveCount); - - /* compute centroid bounds */ - const BBox3fa centBounds = parallel_reduce ( size_t(0), primitiveCount, BBox3fa(empty), [&](const range<size_t>& r) -> BBox3fa { - - BBox3fa bounds(empty); - for (size_t i=r.begin(); i<r.end(); i++) - bounds.extend(prims[i].bounds().center2()); - return bounds; - }, BBox3fa::merge); - - /* compute morton codes */ - BVHBuilderMorton::MortonCodeMapping mapping(centBounds); - parallel_for ( size_t(0), primitiveCount, [&](const range<size_t>& r) { - BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton_src[r.begin()]); - for (size_t i=r.begin(); i<r.end(); i++) { - generator(prims[i].bounds(),(unsigned) i); - } - }); - - /* start morton build */ - std::pair<void*,BBox3fa> root = BVHBuilderMorton::build<std::pair<void*,BBox3fa>>( - - /* thread local allocator for fast allocations */ - [&] () -> FastAllocator::CachedAllocator { - return bvh->allocator.getCachedAllocator(); - }, - - /* lambda function that allocates BVH nodes */ - [&] ( const FastAllocator::CachedAllocator& alloc, size_t N ) -> void* { - return createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr); - }, - - /* lambda function that sets bounds */ - [&] (void* node, const std::pair<void*,BBox3fa>* children, size_t N) -> std::pair<void*,BBox3fa> - { - BBox3fa bounds = empty; - void* childptrs[BVHBuilderMorton::MAX_BRANCHING_FACTOR]; - const RTCBounds* cbounds[BVHBuilderMorton::MAX_BRANCHING_FACTOR]; - for (size_t i=0; i<N; i++) { - bounds.extend(children[i].second); - childptrs[i] = children[i].first; - cbounds[i] = (const RTCBounds*)&children[i].second; - } - setNodeBounds(node,cbounds,(unsigned int)N,userPtr); - setNodeChildren(node,childptrs, (unsigned int)N,userPtr); - return std::make_pair(node,bounds); - }, - - /* lambda function that creates BVH leaves */ - [&]( const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc) -> std::pair<void*,BBox3fa> - { - RTCBuildPrimitive localBuildPrims[RTC_BUILD_MAX_PRIMITIVES_PER_LEAF]; - BBox3fa bounds = empty; - for (size_t i=0;i<current.size();i++) - { - const size_t id = morton_src[current.begin()+i].index; - bounds.extend(prims[id].bounds()); - localBuildPrims[i] = prims_i[id]; - } - void* node = createLeaf((RTCThreadLocalAllocator)&alloc,localBuildPrims,current.size(),userPtr); - return std::make_pair(node,bounds); - }, - - /* lambda that calculates the bounds for some primitive */ - [&] (const BVHBuilderMorton::BuildPrim& morton) -> BBox3fa { - return prims[morton.index].bounds(); - }, - - /* progress monitor function */ - [&] (size_t dn) { - if (!buildProgress) return true; - const size_t n = progress.fetch_add(dn)+dn; - const double f = std::min(1.0,double(n)/double(primitiveCount)); - return buildProgress(userPtr,f); - }, - - morton_src.data(),morton_tmp.data(),primitiveCount, - *arguments); - - bvh->allocator.cleanup(); - return root.first; - } - - void* rtcBuildBVHBinnedSAH(const RTCBuildArguments* arguments) - { - BVH* bvh = (BVH*) arguments->bvh; - RTCBuildPrimitive* prims = arguments->primitives; - size_t primitiveCount = arguments->primitiveCount; - RTCCreateNodeFunction createNode = arguments->createNode; - RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren; - RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds; - RTCCreateLeafFunction createLeaf = arguments->createLeaf; - RTCProgressMonitorFunction buildProgress = arguments->buildProgress; - void* userPtr = arguments->userPtr; - - std::atomic<size_t> progress(0); - - /* calculate priminfo */ - auto computeBounds = [&](const range<size_t>& r) -> CentGeomBBox3fa - { - CentGeomBBox3fa bounds(empty); - for (size_t j=r.begin(); j<r.end(); j++) - bounds.extend((BBox3fa&)prims[j]); - return bounds; - }; - const CentGeomBBox3fa bounds = - parallel_reduce(size_t(0),primitiveCount,size_t(1024),size_t(1024),CentGeomBBox3fa(empty), computeBounds, CentGeomBBox3fa::merge2); - - const PrimInfo pinfo(0,primitiveCount,bounds); - - /* build BVH */ - void* root = BVHBuilderBinnedSAH::build<void*>( - - /* thread local allocator for fast allocations */ - [&] () -> FastAllocator::CachedAllocator { - return bvh->allocator.getCachedAllocator(); - }, - - /* lambda function that creates BVH nodes */ - [&](BVHBuilderBinnedSAH::BuildRecord* children, const size_t N, const FastAllocator::CachedAllocator& alloc) -> void* - { - void* node = createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr); - const RTCBounds* cbounds[GeneralBVHBuilder::MAX_BRANCHING_FACTOR]; - for (size_t i=0; i<N; i++) cbounds[i] = (const RTCBounds*) &children[i].prims.geomBounds; - setNodeBounds(node,cbounds, (unsigned int)N,userPtr); - return node; - }, - - /* lambda function that updates BVH nodes */ - [&](const BVHBuilderBinnedSAH::BuildRecord& precord, const BVHBuilderBinnedSAH::BuildRecord* crecords, void* node, void** children, const size_t N) -> void* { - setNodeChildren(node,children, (unsigned int)N,userPtr); - return node; - }, - - /* lambda function that creates BVH leaves */ - [&](const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> void* { - return createLeaf((RTCThreadLocalAllocator)&alloc,(RTCBuildPrimitive*)(prims+range.begin()),range.size(),userPtr); - }, - - /* progress monitor function */ - [&] (size_t dn) { - if (!buildProgress) return true; - const size_t n = progress.fetch_add(dn)+dn; - const double f = std::min(1.0,double(n)/double(primitiveCount)); - return buildProgress(userPtr,f); - }, - - (PrimRef*)prims,pinfo,*arguments); - - bvh->allocator.cleanup(); - return root; - } - - static __forceinline const std::pair<CentGeomBBox3fa,unsigned int> mergePair(const std::pair<CentGeomBBox3fa,unsigned int>& a, const std::pair<CentGeomBBox3fa,unsigned int>& b) { - CentGeomBBox3fa centBounds = CentGeomBBox3fa::merge2(a.first,b.first); - unsigned int maxGeomID = max(a.second,b.second); - return std::pair<CentGeomBBox3fa,unsigned int>(centBounds,maxGeomID); - } - - void* rtcBuildBVHSpatialSAH(const RTCBuildArguments* arguments) - { - BVH* bvh = (BVH*) arguments->bvh; - RTCBuildPrimitive* prims = arguments->primitives; - size_t primitiveCount = arguments->primitiveCount; - RTCCreateNodeFunction createNode = arguments->createNode; - RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren; - RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds; - RTCCreateLeafFunction createLeaf = arguments->createLeaf; - RTCSplitPrimitiveFunction splitPrimitive = arguments->splitPrimitive; - RTCProgressMonitorFunction buildProgress = arguments->buildProgress; - void* userPtr = arguments->userPtr; - - std::atomic<size_t> progress(0); - - /* calculate priminfo */ - - auto computeBounds = [&](const range<size_t>& r) -> std::pair<CentGeomBBox3fa,unsigned int> - { - CentGeomBBox3fa bounds(empty); - unsigned maxGeomID = 0; - for (size_t j=r.begin(); j<r.end(); j++) - { - bounds.extend((BBox3fa&)prims[j]); - maxGeomID = max(maxGeomID,prims[j].geomID); - } - return std::pair<CentGeomBBox3fa,unsigned int>(bounds,maxGeomID); - }; - - - const std::pair<CentGeomBBox3fa,unsigned int> pair = - parallel_reduce(size_t(0),primitiveCount,size_t(1024),size_t(1024),std::pair<CentGeomBBox3fa,unsigned int>(CentGeomBBox3fa(empty),0), computeBounds, mergePair); - - CentGeomBBox3fa bounds = pair.first; - const unsigned int maxGeomID = pair.second; - - if (unlikely(maxGeomID >= ((unsigned int)1 << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)))) - { - /* fallback code for max geomID larger than threshold */ - return rtcBuildBVHBinnedSAH(arguments); - } - - const PrimInfo pinfo(0,primitiveCount,bounds); - - /* function that splits a build primitive */ - struct Splitter - { - Splitter (RTCSplitPrimitiveFunction splitPrimitive, unsigned geomID, unsigned primID, void* userPtr) - : splitPrimitive(splitPrimitive), geomID(geomID), primID(primID), userPtr(userPtr) {} - - __forceinline void operator() (PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const - { - prim.geomIDref() &= BVHBuilderBinnedFastSpatialSAH::GEOMID_MASK; - splitPrimitive((RTCBuildPrimitive*)&prim,(unsigned)dim,pos,(RTCBounds*)&left_o,(RTCBounds*)&right_o,userPtr); - left_o.geomIDref() = geomID; left_o.primIDref() = primID; - right_o.geomIDref() = geomID; right_o.primIDref() = primID; - } - - __forceinline void operator() (const BBox3fa& box, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const - { - PrimRef prim(box,geomID & BVHBuilderBinnedFastSpatialSAH::GEOMID_MASK,primID); - splitPrimitive((RTCBuildPrimitive*)&prim,(unsigned)dim,pos,(RTCBounds*)&left_o,(RTCBounds*)&right_o,userPtr); - } - - RTCSplitPrimitiveFunction splitPrimitive; - unsigned geomID; - unsigned primID; - void* userPtr; - }; - - /* build BVH */ - void* root = BVHBuilderBinnedFastSpatialSAH::build<void*>( - - /* thread local allocator for fast allocations */ - [&] () -> FastAllocator::CachedAllocator { - return bvh->allocator.getCachedAllocator(); - }, - - /* lambda function that creates BVH nodes */ - [&] (BVHBuilderBinnedFastSpatialSAH::BuildRecord* children, const size_t N, const FastAllocator::CachedAllocator& alloc) -> void* - { - void* node = createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr); - const RTCBounds* cbounds[GeneralBVHBuilder::MAX_BRANCHING_FACTOR]; - for (size_t i=0; i<N; i++) cbounds[i] = (const RTCBounds*) &children[i].prims.geomBounds; - setNodeBounds(node,cbounds, (unsigned int)N,userPtr); - return node; - }, - - /* lambda function that updates BVH nodes */ - [&] (const BVHBuilderBinnedFastSpatialSAH::BuildRecord& precord, const BVHBuilderBinnedFastSpatialSAH::BuildRecord* crecords, void* node, void** children, const size_t N) -> void* { - setNodeChildren(node,children, (unsigned int)N,userPtr); - return node; - }, - - /* lambda function that creates BVH leaves */ - [&] (const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> void* { - return createLeaf((RTCThreadLocalAllocator)&alloc,(RTCBuildPrimitive*)(prims+range.begin()),range.size(),userPtr); - }, - - /* returns the splitter */ - [&] ( const PrimRef& prim ) -> Splitter { - return Splitter(splitPrimitive,prim.geomID(),prim.primID(),userPtr); - }, - - /* progress monitor function */ - [&] (size_t dn) { - if (!buildProgress) return true; - const size_t n = progress.fetch_add(dn)+dn; - const double f = std::min(1.0,double(n)/double(primitiveCount)); - return buildProgress(userPtr,f); - }, - - (PrimRef*)prims, - arguments->primitiveArrayCapacity, - pinfo,*arguments); - - bvh->allocator.cleanup(); - return root; - } - } -} - -using namespace embree; -using namespace embree::isa; - -RTC_NAMESPACE_BEGIN - - RTC_API RTCBVH rtcNewBVH(RTCDevice device) - { - RTC_CATCH_BEGIN; - RTC_TRACE(rtcNewAllocator); - RTC_VERIFY_HANDLE(device); - BVH* bvh = new BVH((Device*)device); - return (RTCBVH) bvh->refInc(); - RTC_CATCH_END((Device*)device); - return nullptr; - } - - RTC_API void* rtcBuildBVH(const RTCBuildArguments* arguments) - { - BVH* bvh = (BVH*) arguments->bvh; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcBuildBVH); - RTC_VERIFY_HANDLE(bvh); - RTC_VERIFY_HANDLE(arguments); - RTC_VERIFY_HANDLE(arguments->createNode); - RTC_VERIFY_HANDLE(arguments->setNodeChildren); - RTC_VERIFY_HANDLE(arguments->setNodeBounds); - RTC_VERIFY_HANDLE(arguments->createLeaf); - - if (arguments->primitiveArrayCapacity < arguments->primitiveCount) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"primitiveArrayCapacity must be greater or equal to primitiveCount") - - /* initialize the allocator */ - bvh->allocator.init_estimate(arguments->primitiveCount*sizeof(BBox3fa)); - bvh->allocator.reset(); - - /* switch between differnet builders based on quality level */ - if (arguments->buildQuality == RTC_BUILD_QUALITY_LOW) - return rtcBuildBVHMorton(arguments); - else if (arguments->buildQuality == RTC_BUILD_QUALITY_MEDIUM) - return rtcBuildBVHBinnedSAH(arguments); - else if (arguments->buildQuality == RTC_BUILD_QUALITY_HIGH) { - if (arguments->splitPrimitive == nullptr || arguments->primitiveArrayCapacity <= arguments->primitiveCount) - return rtcBuildBVHBinnedSAH(arguments); - else - return rtcBuildBVHSpatialSAH(arguments); - } - else - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid build quality"); - - /* if we are in dynamic mode, then do not clear temporary data */ - if (!(arguments->buildFlags & RTC_BUILD_FLAG_DYNAMIC)) - { - bvh->morton_src.clear(); - bvh->morton_tmp.clear(); - } - - RTC_CATCH_END(bvh->device); - return nullptr; - } - - RTC_API void* rtcThreadLocalAlloc(RTCThreadLocalAllocator localAllocator, size_t bytes, size_t align) - { - FastAllocator::CachedAllocator* alloc = (FastAllocator::CachedAllocator*) localAllocator; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcThreadLocalAlloc); - return alloc->malloc0(bytes,align); - RTC_CATCH_END(alloc->alloc->getDevice()); - return nullptr; - } - - RTC_API void rtcMakeStaticBVH(RTCBVH hbvh) - { - BVH* bvh = (BVH*) hbvh; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcStaticBVH); - RTC_VERIFY_HANDLE(hbvh); - bvh->morton_src.clear(); - bvh->morton_tmp.clear(); - RTC_CATCH_END(bvh->device); - } - - RTC_API void rtcRetainBVH(RTCBVH hbvh) - { - BVH* bvh = (BVH*) hbvh; - Device* device = bvh ? bvh->device : nullptr; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcRetainBVH); - RTC_VERIFY_HANDLE(hbvh); - bvh->refInc(); - RTC_CATCH_END(device); - } - - RTC_API void rtcReleaseBVH(RTCBVH hbvh) - { - BVH* bvh = (BVH*) hbvh; - Device* device = bvh ? bvh->device : nullptr; - RTC_CATCH_BEGIN; - RTC_TRACE(rtcReleaseBVH); - RTC_VERIFY_HANDLE(hbvh); - bvh->refDec(); - RTC_CATCH_END(device); - } - -RTC_NAMESPACE_END diff --git a/thirdparty/embree-aarch64/kernels/common/scene.cpp b/thirdparty/embree-aarch64/kernels/common/scene.cpp deleted file mode 100644 index 1e23aeb415..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/scene.cpp +++ /dev/null @@ -1,976 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "scene.h" - -#include "../bvh/bvh4_factory.h" -#include "../bvh/bvh8_factory.h" -#include "../../common/algorithms/parallel_reduce.h" - -namespace embree -{ - /* error raising rtcIntersect and rtcOccluded functions */ - void missing_rtcCommit() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"scene not committed"); } - void invalid_rtcIntersect1() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect and rtcOccluded not enabled"); } - void invalid_rtcIntersect4() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect4 and rtcOccluded4 not enabled"); } - void invalid_rtcIntersect8() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect8 and rtcOccluded8 not enabled"); } - void invalid_rtcIntersect16() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersect16 and rtcOccluded16 not enabled"); } - void invalid_rtcIntersectN() { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcIntersectN and rtcOccludedN not enabled"); } - - Scene::Scene (Device* device) - : device(device), - flags_modified(true), enabled_geometry_types(0), - scene_flags(RTC_SCENE_FLAG_NONE), - quality_flags(RTC_BUILD_QUALITY_MEDIUM), - is_build(false), modified(true), - progressInterface(this), progress_monitor_function(nullptr), progress_monitor_ptr(nullptr), progress_monitor_counter(0) - { - device->refInc(); - - intersectors = Accel::Intersectors(missing_rtcCommit); - - /* one can overwrite flags through device for debugging */ - if (device->quality_flags != -1) - quality_flags = (RTCBuildQuality) device->quality_flags; - if (device->scene_flags != -1) - scene_flags = (RTCSceneFlags) device->scene_flags; - } - - Scene::~Scene() noexcept - { - device->refDec(); - } - - void Scene::printStatistics() - { - /* calculate maximum number of time segments */ - unsigned max_time_steps = 0; - for (size_t i=0; i<size(); i++) { - if (!get(i)) continue; - max_time_steps = max(max_time_steps,get(i)->numTimeSteps); - } - - /* initialize vectors*/ - std::vector<size_t> statistics[Geometry::GTY_END]; - for (size_t i=0; i<Geometry::GTY_END; i++) - statistics[i].resize(max_time_steps); - - /* gather statistics */ - for (size_t i=0; i<size(); i++) - { - if (!get(i)) continue; - int ty = get(i)->getType(); - assert(ty<Geometry::GTY_END); - int timesegments = get(i)->numTimeSegments(); - assert((unsigned int)timesegments < max_time_steps); - statistics[ty][timesegments] += get(i)->size(); - } - - /* print statistics */ - std::cout << std::setw(23) << "segments" << ": "; - for (size_t t=0; t<max_time_steps; t++) - std::cout << std::setw(10) << t; - std::cout << std::endl; - - std::cout << "-------------------------"; - for (size_t t=0; t<max_time_steps; t++) - std::cout << "----------"; - std::cout << std::endl; - - for (size_t p=0; p<Geometry::GTY_END; p++) - { - if (std::string(Geometry::gtype_names[p]) == "") continue; - std::cout << std::setw(23) << Geometry::gtype_names[p] << ": "; - for (size_t t=0; t<max_time_steps; t++) - std::cout << std::setw(10) << statistics[p][t]; - std::cout << std::endl; - } - } - - void Scene::createTriangleAccel() - { -#if defined(EMBREE_GEOMETRY_TRIANGLE) - if (device->tri_accel == "default") - { - if (quality_flags != RTC_BUILD_QUALITY_LOW) - { - int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel(); - switch (mode) { - case /*0b00*/ 0: -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX()) - { - if (quality_flags == RTC_BUILD_QUALITY_HIGH) - accels_add(device->bvh8_factory->BVH8Triangle4(this,BVHFactory::BuildVariant::HIGH_QUALITY,BVHFactory::IntersectVariant::FAST)); - else - accels_add(device->bvh8_factory->BVH8Triangle4(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST)); - } - else -#endif - { - if (quality_flags == RTC_BUILD_QUALITY_HIGH) - accels_add(device->bvh4_factory->BVH4Triangle4(this,BVHFactory::BuildVariant::HIGH_QUALITY,BVHFactory::IntersectVariant::FAST)); - else - accels_add(device->bvh4_factory->BVH4Triangle4(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST)); - } - break; - - case /*0b01*/ 1: -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX()) - accels_add(device->bvh8_factory->BVH8Triangle4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); - else -#endif - accels_add(device->bvh4_factory->BVH4Triangle4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); - - break; - case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break; - case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break; - } - } - else /* dynamic */ - { -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX()) - { - int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel(); - switch (mode) { - case /*0b00*/ 0: accels_add(device->bvh8_factory->BVH8Triangle4 (this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST )); break; - case /*0b01*/ 1: accels_add(device->bvh8_factory->BVH8Triangle4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break; - case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST )); break; - case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break; - } - } - else -#endif - { - int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel(); - switch (mode) { - case /*0b00*/ 0: accels_add(device->bvh4_factory->BVH4Triangle4 (this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST )); break; - case /*0b01*/ 1: accels_add(device->bvh4_factory->BVH4Triangle4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break; - case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST )); break; - case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Triangle4i(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break; - } - } - } - } - else if (device->tri_accel == "bvh4.triangle4") accels_add(device->bvh4_factory->BVH4Triangle4 (this)); - else if (device->tri_accel == "bvh4.triangle4v") accels_add(device->bvh4_factory->BVH4Triangle4v(this)); - else if (device->tri_accel == "bvh4.triangle4i") accels_add(device->bvh4_factory->BVH4Triangle4i(this)); - else if (device->tri_accel == "qbvh4.triangle4i") accels_add(device->bvh4_factory->BVH4QuantizedTriangle4i(this)); - -#if defined (EMBREE_TARGET_SIMD8) - else if (device->tri_accel == "bvh8.triangle4") accels_add(device->bvh8_factory->BVH8Triangle4 (this)); - else if (device->tri_accel == "bvh8.triangle4v") accels_add(device->bvh8_factory->BVH8Triangle4v(this)); - else if (device->tri_accel == "bvh8.triangle4i") accels_add(device->bvh8_factory->BVH8Triangle4i(this)); - else if (device->tri_accel == "qbvh8.triangle4i") accels_add(device->bvh8_factory->BVH8QuantizedTriangle4i(this)); - else if (device->tri_accel == "qbvh8.triangle4") accels_add(device->bvh8_factory->BVH8QuantizedTriangle4(this)); -#endif - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown triangle acceleration structure "+device->tri_accel); -#endif - } - - void Scene::createTriangleMBAccel() - { -#if defined(EMBREE_GEOMETRY_TRIANGLE) - if (device->tri_accel_mb == "default") - { - int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel(); - -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX2()) // BVH8 reduces performance on AVX only-machines - { - switch (mode) { - case /*0b00*/ 0: accels_add(device->bvh8_factory->BVH8Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break; - case /*0b01*/ 1: accels_add(device->bvh8_factory->BVH8Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break; - case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break; - case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break; - } - } - else -#endif - { - switch (mode) { - case /*0b00*/ 0: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break; - case /*0b01*/ 1: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break; - case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break; - case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Triangle4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break; - } - } - } - else if (device->tri_accel_mb == "bvh4.triangle4imb") accels_add(device->bvh4_factory->BVH4Triangle4iMB(this)); - else if (device->tri_accel_mb == "bvh4.triangle4vmb") accels_add(device->bvh4_factory->BVH4Triangle4vMB(this)); -#if defined (EMBREE_TARGET_SIMD8) - else if (device->tri_accel_mb == "bvh8.triangle4imb") accels_add(device->bvh8_factory->BVH8Triangle4iMB(this)); - else if (device->tri_accel_mb == "bvh8.triangle4vmb") accels_add(device->bvh8_factory->BVH8Triangle4vMB(this)); -#endif - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown motion blur triangle acceleration structure "+device->tri_accel_mb); -#endif - } - - void Scene::createQuadAccel() - { -#if defined(EMBREE_GEOMETRY_QUAD) - if (device->quad_accel == "default") - { - if (quality_flags != RTC_BUILD_QUALITY_LOW) - { - /* static */ - int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel(); - switch (mode) { - case /*0b00*/ 0: -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX()) - { - if (quality_flags == RTC_BUILD_QUALITY_HIGH) - accels_add(device->bvh8_factory->BVH8Quad4v(this,BVHFactory::BuildVariant::HIGH_QUALITY,BVHFactory::IntersectVariant::FAST)); - else - accels_add(device->bvh8_factory->BVH8Quad4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST)); - } - else -#endif - { - if (quality_flags == RTC_BUILD_QUALITY_HIGH) - accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::HIGH_QUALITY,BVHFactory::IntersectVariant::FAST)); - else - accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST)); - } - break; - - case /*0b01*/ 1: -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX()) - accels_add(device->bvh8_factory->BVH8Quad4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); - else -#endif - accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); - break; - - case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Quad4i(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST)); break; - case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Quad4i(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break; - } - } - else /* dynamic */ - { -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX()) - { - int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel(); - switch (mode) { - case /*0b00*/ 0: accels_add(device->bvh8_factory->BVH8Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST)); break; - case /*0b01*/ 1: accels_add(device->bvh8_factory->BVH8Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break; - case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST)); break; - case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break; - } - } - else -#endif - { - int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel(); - switch (mode) { - case /*0b00*/ 0: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST)); break; - case /*0b01*/ 1: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break; - case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::FAST)); break; - case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::DYNAMIC,BVHFactory::IntersectVariant::ROBUST)); break; - } - } - } - } - else if (device->quad_accel == "bvh4.quad4v") accels_add(device->bvh4_factory->BVH4Quad4v(this)); - else if (device->quad_accel == "bvh4.quad4i") accels_add(device->bvh4_factory->BVH4Quad4i(this)); - else if (device->quad_accel == "qbvh4.quad4i") accels_add(device->bvh4_factory->BVH4QuantizedQuad4i(this)); - -#if defined (EMBREE_TARGET_SIMD8) - else if (device->quad_accel == "bvh8.quad4v") accels_add(device->bvh8_factory->BVH8Quad4v(this)); - else if (device->quad_accel == "bvh8.quad4i") accels_add(device->bvh8_factory->BVH8Quad4i(this)); - else if (device->quad_accel == "qbvh8.quad4i") accels_add(device->bvh8_factory->BVH8QuantizedQuad4i(this)); -#endif - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown quad acceleration structure "+device->quad_accel); -#endif - } - - void Scene::createQuadMBAccel() - { -#if defined(EMBREE_GEOMETRY_QUAD) - if (device->quad_accel_mb == "default") - { - int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel(); - switch (mode) { - case /*0b00*/ 0: -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX()) - accels_add(device->bvh8_factory->BVH8Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST)); - else -#endif - accels_add(device->bvh4_factory->BVH4Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST)); - break; - - case /*0b01*/ 1: -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX()) - accels_add(device->bvh8_factory->BVH8Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); - else -#endif - accels_add(device->bvh4_factory->BVH4Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); - break; - - case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST )); break; - case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4Quad4iMB(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); break; - } - } - else if (device->quad_accel_mb == "bvh4.quad4imb") accels_add(device->bvh4_factory->BVH4Quad4iMB(this)); -#if defined (EMBREE_TARGET_SIMD8) - else if (device->quad_accel_mb == "bvh8.quad4imb") accels_add(device->bvh8_factory->BVH8Quad4iMB(this)); -#endif - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown quad motion blur acceleration structure "+device->quad_accel_mb); -#endif - } - - void Scene::createHairAccel() - { -#if defined(EMBREE_GEOMETRY_CURVE) || defined(EMBREE_GEOMETRY_POINT) - if (device->hair_accel == "default") - { - int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel(); -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX2()) // only enable on HSW machines, for SNB this codepath is slower - { - switch (mode) { - case /*0b00*/ 0: accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8v(this,BVHFactory::IntersectVariant::FAST)); break; - case /*0b01*/ 1: accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8v(this,BVHFactory::IntersectVariant::ROBUST)); break; - case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve8i(this,BVHFactory::IntersectVariant::FAST)); break; - case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve8i(this,BVHFactory::IntersectVariant::ROBUST)); break; - } - } - else -#endif - { - switch (mode) { - case /*0b00*/ 0: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4v(this,BVHFactory::IntersectVariant::FAST)); break; - case /*0b01*/ 1: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4v(this,BVHFactory::IntersectVariant::ROBUST)); break; - case /*0b10*/ 2: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4i(this,BVHFactory::IntersectVariant::FAST)); break; - case /*0b11*/ 3: accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4i(this,BVHFactory::IntersectVariant::ROBUST)); break; - } - } - } - else if (device->hair_accel == "bvh4obb.virtualcurve4v" ) accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4v(this,BVHFactory::IntersectVariant::FAST)); - else if (device->hair_accel == "bvh4obb.virtualcurve4i" ) accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4i(this,BVHFactory::IntersectVariant::FAST)); -#if defined (EMBREE_TARGET_SIMD8) - else if (device->hair_accel == "bvh8obb.virtualcurve8v" ) accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8v(this,BVHFactory::IntersectVariant::FAST)); - else if (device->hair_accel == "bvh4obb.virtualcurve8i" ) accels_add(device->bvh4_factory->BVH4OBBVirtualCurve8i(this,BVHFactory::IntersectVariant::FAST)); -#endif - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown hair acceleration structure "+device->hair_accel); -#endif - } - - void Scene::createHairMBAccel() - { -#if defined(EMBREE_GEOMETRY_CURVE) || defined(EMBREE_GEOMETRY_POINT) - if (device->hair_accel_mb == "default") - { -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX2()) // only enable on HSW machines, on SNB this codepath is slower - { - if (isRobustAccel()) accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8iMB(this,BVHFactory::IntersectVariant::ROBUST)); - else accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8iMB(this,BVHFactory::IntersectVariant::FAST)); - } - else -#endif - { - if (isRobustAccel()) accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4iMB(this,BVHFactory::IntersectVariant::ROBUST)); - else accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4iMB(this,BVHFactory::IntersectVariant::FAST)); - } - } - else if (device->hair_accel_mb == "bvh4.virtualcurve4imb") accels_add(device->bvh4_factory->BVH4OBBVirtualCurve4iMB(this,BVHFactory::IntersectVariant::FAST)); - -#if defined (EMBREE_TARGET_SIMD8) - else if (device->hair_accel_mb == "bvh4.virtualcurve8imb") accels_add(device->bvh4_factory->BVH4OBBVirtualCurve8iMB(this,BVHFactory::IntersectVariant::FAST)); - else if (device->hair_accel_mb == "bvh8.virtualcurve8imb") accels_add(device->bvh8_factory->BVH8OBBVirtualCurve8iMB(this,BVHFactory::IntersectVariant::FAST)); -#endif - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown motion blur hair acceleration structure "+device->hair_accel_mb); -#endif - } - - void Scene::createSubdivAccel() - { -#if defined(EMBREE_GEOMETRY_SUBDIVISION) - if (device->subdiv_accel == "default") { - accels_add(device->bvh4_factory->BVH4SubdivPatch1(this)); - } - else if (device->subdiv_accel == "bvh4.grid.eager" ) accels_add(device->bvh4_factory->BVH4SubdivPatch1(this)); - else if (device->subdiv_accel == "bvh4.subdivpatch1eager" ) accels_add(device->bvh4_factory->BVH4SubdivPatch1(this)); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown subdiv accel "+device->subdiv_accel); -#endif - } - - void Scene::createSubdivMBAccel() - { -#if defined(EMBREE_GEOMETRY_SUBDIVISION) - if (device->subdiv_accel_mb == "default") { - accels_add(device->bvh4_factory->BVH4SubdivPatch1MB(this)); - } - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown subdiv mblur accel "+device->subdiv_accel_mb); -#endif - } - - void Scene::createUserGeometryAccel() - { -#if defined(EMBREE_GEOMETRY_USER) - if (device->object_accel == "default") - { -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX() && !isCompactAccel()) - { - if (quality_flags != RTC_BUILD_QUALITY_LOW) { - accels_add(device->bvh8_factory->BVH8UserGeometry(this,BVHFactory::BuildVariant::STATIC)); - } else { - accels_add(device->bvh8_factory->BVH8UserGeometry(this,BVHFactory::BuildVariant::DYNAMIC)); - } - } - else -#endif - { - if (quality_flags != RTC_BUILD_QUALITY_LOW) { - accels_add(device->bvh4_factory->BVH4UserGeometry(this,BVHFactory::BuildVariant::STATIC)); - } else { - accels_add(device->bvh4_factory->BVH4UserGeometry(this,BVHFactory::BuildVariant::DYNAMIC)); - } - } - } - else if (device->object_accel == "bvh4.object") accels_add(device->bvh4_factory->BVH4UserGeometry(this)); -#if defined (EMBREE_TARGET_SIMD8) - else if (device->object_accel == "bvh8.object") accels_add(device->bvh8_factory->BVH8UserGeometry(this)); -#endif - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown user geometry accel "+device->object_accel); -#endif - } - - void Scene::createUserGeometryMBAccel() - { -#if defined(EMBREE_GEOMETRY_USER) - if (device->object_accel_mb == "default" ) { -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX() && !isCompactAccel()) - accels_add(device->bvh8_factory->BVH8UserGeometryMB(this)); - else -#endif - accels_add(device->bvh4_factory->BVH4UserGeometryMB(this)); - } - else if (device->object_accel_mb == "bvh4.object") accels_add(device->bvh4_factory->BVH4UserGeometryMB(this)); -#if defined (EMBREE_TARGET_SIMD8) - else if (device->object_accel_mb == "bvh8.object") accels_add(device->bvh8_factory->BVH8UserGeometryMB(this)); -#endif - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown user geometry mblur accel "+device->object_accel_mb); -#endif - } - - void Scene::createInstanceAccel() - { -#if defined(EMBREE_GEOMETRY_INSTANCE) - // if (device->object_accel == "default") - { -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX() && !isCompactAccel()) { - if (quality_flags != RTC_BUILD_QUALITY_LOW) { - accels_add(device->bvh8_factory->BVH8Instance(this, false, BVHFactory::BuildVariant::STATIC)); - } else { - accels_add(device->bvh8_factory->BVH8Instance(this, false, BVHFactory::BuildVariant::DYNAMIC)); - } - } - else -#endif - { - if (quality_flags != RTC_BUILD_QUALITY_LOW) { - accels_add(device->bvh4_factory->BVH4Instance(this, false, BVHFactory::BuildVariant::STATIC)); - } else { - accels_add(device->bvh4_factory->BVH4Instance(this, false, BVHFactory::BuildVariant::DYNAMIC)); - } - } - } - // else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown instance accel "+device->instance_accel); -#endif - } - - void Scene::createInstanceMBAccel() - { -#if defined(EMBREE_GEOMETRY_INSTANCE) - //if (device->instance_accel_mb == "default") - { -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX() && !isCompactAccel()) - accels_add(device->bvh8_factory->BVH8InstanceMB(this, false)); - else -#endif - accels_add(device->bvh4_factory->BVH4InstanceMB(this, false)); - } - //else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown instance mblur accel "+device->instance_accel_mb); -#endif - } - - void Scene::createInstanceExpensiveAccel() - { -#if defined(EMBREE_GEOMETRY_INSTANCE) - // if (device->object_accel == "default") - { -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX() && !isCompactAccel()) { - if (quality_flags != RTC_BUILD_QUALITY_LOW) { - accels_add(device->bvh8_factory->BVH8Instance(this, true, BVHFactory::BuildVariant::STATIC)); - } else { - accels_add(device->bvh8_factory->BVH8Instance(this, true, BVHFactory::BuildVariant::DYNAMIC)); - } - } - else -#endif - { - if (quality_flags != RTC_BUILD_QUALITY_LOW) { - accels_add(device->bvh4_factory->BVH4Instance(this, true, BVHFactory::BuildVariant::STATIC)); - } else { - accels_add(device->bvh4_factory->BVH4Instance(this, true, BVHFactory::BuildVariant::DYNAMIC)); - } - } - } - // else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown instance accel "+device->instance_accel); -#endif - } - - void Scene::createInstanceExpensiveMBAccel() - { -#if defined(EMBREE_GEOMETRY_INSTANCE) - //if (device->instance_accel_mb == "default") - { -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX() && !isCompactAccel()) - accels_add(device->bvh8_factory->BVH8InstanceMB(this, true)); - else -#endif - accels_add(device->bvh4_factory->BVH4InstanceMB(this, true)); - } - //else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown instance mblur accel "+device->instance_accel_mb); -#endif - } - - void Scene::createGridAccel() - { - BVHFactory::IntersectVariant ivariant = isRobustAccel() ? BVHFactory::IntersectVariant::ROBUST : BVHFactory::IntersectVariant::FAST; -#if defined(EMBREE_GEOMETRY_GRID) - if (device->grid_accel == "default") - { -#if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX() && !isCompactAccel()) - { - accels_add(device->bvh8_factory->BVH8Grid(this,BVHFactory::BuildVariant::STATIC,ivariant)); - } - else -#endif - { - accels_add(device->bvh4_factory->BVH4Grid(this,BVHFactory::BuildVariant::STATIC,ivariant)); - } - } - else if (device->grid_accel == "bvh4.grid") accels_add(device->bvh4_factory->BVH4Grid(this,BVHFactory::BuildVariant::STATIC,ivariant)); -#if defined (EMBREE_TARGET_SIMD8) - else if (device->grid_accel == "bvh8.grid") accels_add(device->bvh8_factory->BVH8Grid(this,BVHFactory::BuildVariant::STATIC,ivariant)); -#endif - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown grid accel "+device->grid_accel); -#endif - - } - - void Scene::createGridMBAccel() - { -#if defined(EMBREE_GEOMETRY_GRID) - if (device->grid_accel_mb == "default") - { - accels_add(device->bvh4_factory->BVH4GridMB(this,BVHFactory::BuildVariant::STATIC)); - } - else if (device->grid_accel_mb == "bvh4mb.grid") accels_add(device->bvh4_factory->BVH4GridMB(this)); - else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown grid mb accel "+device->grid_accel); -#endif - - } - - void Scene::clear() { - } - - unsigned Scene::bind(unsigned geomID, Ref<Geometry> geometry) - { -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(geometriesMutex); -#else - Lock<SpinLock> lock(geometriesMutex); -#endif - if (geomID == RTC_INVALID_GEOMETRY_ID) { - geomID = id_pool.allocate(); - if (geomID == RTC_INVALID_GEOMETRY_ID) - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"too many geometries inside scene"); - } - else - { - if (!id_pool.add(geomID)) - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid geometry ID provided"); - } - if (geomID >= geometries.size()) { - geometries.resize(geomID+1); - vertices.resize(geomID+1); - geometryModCounters_.resize(geomID+1); - } - geometries[geomID] = geometry; - geometryModCounters_[geomID] = 0; - if (geometry->isEnabled()) { - setModified (); - } - return geomID; - } - - void Scene::detachGeometry(size_t geomID) - { -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(geometriesMutex); -#else - Lock<SpinLock> lock(geometriesMutex); -#endif - - if (geomID >= geometries.size()) - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid geometry ID"); - - Ref<Geometry>& geometry = geometries[geomID]; - if (geometry == null) - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid geometry"); - - if (geometry->isEnabled()) { - setModified (); - } - accels_deleteGeometry(unsigned(geomID)); - id_pool.deallocate((unsigned)geomID); - geometries[geomID] = null; - vertices[geomID] = nullptr; - geometryModCounters_[geomID] = 0; - } - - void Scene::updateInterface() - { - is_build = true; - } - - void Scene::commit_task () - { - checkIfModifiedAndSet (); - if (!isModified()) { - return; - } - - /* print scene statistics */ - if (device->verbosity(2)) - printStatistics(); - - progress_monitor_counter = 0; - - /* gather scene stats and call preCommit function of each geometry */ - this->world = parallel_reduce (size_t(0), geometries.size(), GeometryCounts (), - [this](const range<size_t>& r)->GeometryCounts - { - GeometryCounts c; - for (auto i=r.begin(); i<r.end(); ++i) - { - if (geometries[i] && geometries[i]->isEnabled()) - { - geometries[i]->preCommit(); - geometries[i]->addElementsToCount (c); - c.numFilterFunctions += (int) geometries[i]->hasFilterFunctions(); - } - } - return c; - }, - std::plus<GeometryCounts>() - ); - - /* select acceleration structures to build */ - unsigned int new_enabled_geometry_types = world.enabledGeometryTypesMask(); - if (flags_modified || new_enabled_geometry_types != enabled_geometry_types) - { - accels_init(); - - /* we need to make all geometries modified, otherwise two level builder will - not rebuild currently not modified geometries */ - parallel_for(geometryModCounters_.size(), [&] ( const size_t i ) { - geometryModCounters_[i] = 0; - }); - - if (getNumPrimitives(TriangleMesh::geom_type,false)) createTriangleAccel(); - if (getNumPrimitives(TriangleMesh::geom_type,true)) createTriangleMBAccel(); - if (getNumPrimitives(QuadMesh::geom_type,false)) createQuadAccel(); - if (getNumPrimitives(QuadMesh::geom_type,true)) createQuadMBAccel(); - if (getNumPrimitives(GridMesh::geom_type,false)) createGridAccel(); - if (getNumPrimitives(GridMesh::geom_type,true)) createGridMBAccel(); - if (getNumPrimitives(SubdivMesh::geom_type,false)) createSubdivAccel(); - if (getNumPrimitives(SubdivMesh::geom_type,true)) createSubdivMBAccel(); - if (getNumPrimitives(Geometry::MTY_CURVES,false)) createHairAccel(); - if (getNumPrimitives(Geometry::MTY_CURVES,true)) createHairMBAccel(); - if (getNumPrimitives(UserGeometry::geom_type,false)) createUserGeometryAccel(); - if (getNumPrimitives(UserGeometry::geom_type,true)) createUserGeometryMBAccel(); - if (getNumPrimitives(Geometry::MTY_INSTANCE_CHEAP,false)) createInstanceAccel(); - if (getNumPrimitives(Geometry::MTY_INSTANCE_CHEAP,true)) createInstanceMBAccel(); - if (getNumPrimitives(Geometry::MTY_INSTANCE_EXPENSIVE,false)) createInstanceExpensiveAccel(); - if (getNumPrimitives(Geometry::MTY_INSTANCE_EXPENSIVE,true)) createInstanceExpensiveMBAccel(); - - flags_modified = false; - enabled_geometry_types = new_enabled_geometry_types; - } - - /* select fast code path if no filter function is present */ - accels_select(hasFilterFunction()); - - /* build all hierarchies of this scene */ - accels_build(); - - /* make static geometry immutable */ - if (!isDynamicAccel()) { - accels_immutable(); - flags_modified = true; // in non-dynamic mode we have to re-create accels - } - - /* call postCommit function of each geometry */ - parallel_for(geometries.size(), [&] ( const size_t i ) { - if (geometries[i] && geometries[i]->isEnabled()) { - geometries[i]->postCommit(); - vertices[i] = geometries[i]->getCompactVertexArray(); - geometryModCounters_[i] = geometries[i]->getModCounter(); - } - }); - - updateInterface(); - - if (device->verbosity(2)) { - std::cout << "created scene intersector" << std::endl; - accels_print(2); - std::cout << "selected scene intersector" << std::endl; - intersectors.print(2); - } - - setModified(false); - } - - void Scene::setBuildQuality(RTCBuildQuality quality_flags_i) - { - if (quality_flags == quality_flags_i) return; - quality_flags = quality_flags_i; - flags_modified = true; - } - - RTCBuildQuality Scene::getBuildQuality() const { - return quality_flags; - } - - void Scene::setSceneFlags(RTCSceneFlags scene_flags_i) - { - if (scene_flags == scene_flags_i) return; - scene_flags = scene_flags_i; - flags_modified = true; - } - - RTCSceneFlags Scene::getSceneFlags() const { - return scene_flags; - } - -#if defined(TASKING_INTERNAL) - - void Scene::commit (bool join) - { - Lock<MutexSys> buildLock(buildMutex,false); - - /* allocates own taskscheduler for each build */ - Ref<TaskScheduler> scheduler = nullptr; - { - Lock<MutexSys> lock(schedulerMutex); - scheduler = this->scheduler; - if (scheduler == null) { - buildLock.lock(); - this->scheduler = scheduler = new TaskScheduler; - } - } - - /* worker threads join build */ - if (!buildLock.isLocked()) - { - if (!join) - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"use rtcJoinCommitScene to join a build operation"); - - scheduler->join(); - return; - } - - /* initiate build */ - // -- GODOT start -- - // try { - scheduler->spawn_root([&]() { commit_task(); Lock<MutexSys> lock(schedulerMutex); this->scheduler = nullptr; }, 1, !join); - // } - // catch (...) { - // accels_clear(); - // updateInterface(); - // Lock<MutexSys> lock(schedulerMutex); - // this->scheduler = nullptr; - // throw; - // } - // -- GODOT end -- - } - -#endif - -#if defined(TASKING_TBB) || defined(TASKING_GCD) - - void Scene::commit (bool join) - { -#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8) - if (join) - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcJoinCommitScene not supported with this TBB version"); -#endif - - /* try to obtain build lock */ - Lock<MutexSys> lock(buildMutex,buildMutex.try_lock()); - - /* join hierarchy build */ - if (!lock.isLocked()) - { -#if !TASKING_TBB_USE_TASK_ISOLATION - if (!join) - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invoking rtcCommitScene from multiple threads is not supported with this TBB version"); -#endif - - do { - -#if defined(TASKING_GCD) - // Do Nothing -#else -#if USE_TASK_ARENA - if (join) { - device->arena->execute([&]{ group.wait(); }); - } - else -#endif - { - group.wait(); - } -#endif - - pause_cpu(); - yield(); - - } while (!buildMutex.try_lock()); - - buildMutex.unlock(); - return; - } - - /* for best performance set FTZ and DAZ flags in the MXCSR control and status register */ - const unsigned int mxcsr = _mm_getcsr(); - _mm_setcsr(mxcsr | /* FTZ */ (1<<15) | /* DAZ */ (1<<6)); - - try { -#if defined(TASKING_TBB) -#if TBB_INTERFACE_VERSION_MAJOR < 8 - tbb::task_group_context ctx( tbb::task_group_context::isolated, tbb::task_group_context::default_traits); -#else - tbb::task_group_context ctx( tbb::task_group_context::isolated, tbb::task_group_context::default_traits | tbb::task_group_context::fp_settings ); -#endif - //ctx.set_priority(tbb::priority_high); - -#if USE_TASK_ARENA - if (join) - { - device->arena->execute([&]{ - group.run([&]{ - tbb::parallel_for (size_t(0), size_t(1), size_t(1), [&] (size_t) { commit_task(); }, ctx); - }); - group.wait(); - }); - } - else -#endif - { - group.run([&]{ - tbb::parallel_for (size_t(0), size_t(1), size_t(1), [&] (size_t) { commit_task(); }, ctx); - }); - group.wait(); - } - - /* reset MXCSR register again */ - _mm_setcsr(mxcsr); - -#elif defined(TASKING_GCD) - - commit_task(); - -#endif // #if defined(TASKING_TBB) - - } - catch (...) - { - /* reset MXCSR register again */ - _mm_setcsr(mxcsr); - - accels_clear(); - updateInterface(); - throw; - } - } -#endif - -#if defined(TASKING_PPL) - - void Scene::commit (bool join) - { -#if defined(TASKING_PPL) - if (join) - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcJoinCommitScene not supported with PPL"); -#endif - - /* try to obtain build lock */ - Lock<MutexSys> lock(buildMutex); - - checkIfModifiedAndSet (); - if (!isModified()) { - return; - } - - /* for best performance set FTZ and DAZ flags in the MXCSR control and status register */ - const unsigned int mxcsr = _mm_getcsr(); - _mm_setcsr(mxcsr | /* FTZ */ (1<<15) | /* DAZ */ (1<<6)); - - try { - - group.run([&]{ - concurrency::parallel_for(size_t(0), size_t(1), size_t(1), [&](size_t) { commit_task(); }); - }); - group.wait(); - - /* reset MXCSR register again */ - _mm_setcsr(mxcsr); - } - catch (...) - { - /* reset MXCSR register again */ - _mm_setcsr(mxcsr); - - accels_clear(); - updateInterface(); - throw; - } - } -#endif - - void Scene::setProgressMonitorFunction(RTCProgressMonitorFunction func, void* ptr) - { - progress_monitor_function = func; - progress_monitor_ptr = ptr; - } - - void Scene::progressMonitor(double dn) - { - if (progress_monitor_function) { - size_t n = size_t(dn) + progress_monitor_counter.fetch_add(size_t(dn)); - if (!progress_monitor_function(progress_monitor_ptr, n / (double(numPrimitives())))) { - throw_RTCError(RTC_ERROR_CANCELLED,"progress monitor forced termination"); - } - } - } -} diff --git a/thirdparty/embree-aarch64/kernels/common/scene.h b/thirdparty/embree-aarch64/kernels/common/scene.h deleted file mode 100644 index b41c6cde91..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/scene.h +++ /dev/null @@ -1,390 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" -#include "device.h" -#include "builder.h" -#include "../../common/algorithms/parallel_any_of.h" -#include "scene_triangle_mesh.h" -#include "scene_quad_mesh.h" -#include "scene_user_geometry.h" -#include "scene_instance.h" -#include "scene_curves.h" -#include "scene_line_segments.h" -#include "scene_subdiv_mesh.h" -#include "scene_grid_mesh.h" -#include "scene_points.h" -#include "../subdiv/tessellation_cache.h" - -#include "acceln.h" -#include "geometry.h" - -namespace embree -{ - /*! Base class all scenes are derived from */ - class Scene : public AccelN - { - ALIGNED_CLASS_(std::alignment_of<Scene>::value); - - public: - template<typename Ty, bool mblur = false> - class Iterator - { - public: - Iterator () {} - - Iterator (Scene* scene, bool all = false) - : scene(scene), all(all) {} - - __forceinline Ty* at(const size_t i) - { - Geometry* geom = scene->geometries[i].ptr; - if (geom == nullptr) return nullptr; - if (!all && !geom->isEnabled()) return nullptr; - const size_t mask = geom->getTypeMask() & Ty::geom_type; - if (!(mask)) return nullptr; - if ((geom->numTimeSteps != 1) != mblur) return nullptr; - return (Ty*) geom; - } - - __forceinline Ty* operator[] (const size_t i) { - return at(i); - } - - __forceinline size_t size() const { - return scene->size(); - } - - __forceinline size_t numPrimitives() const { - return scene->getNumPrimitives(Ty::geom_type,mblur); - } - - __forceinline size_t maxPrimitivesPerGeometry() - { - size_t ret = 0; - for (size_t i=0; i<scene->size(); i++) { - Ty* mesh = at(i); - if (mesh == nullptr) continue; - ret = max(ret,mesh->size()); - } - return ret; - } - - __forceinline unsigned int maxGeomID() - { - unsigned int ret = 0; - for (size_t i=0; i<scene->size(); i++) { - Ty* mesh = at(i); - if (mesh == nullptr) continue; - ret = max(ret,(unsigned int)i); - } - return ret; - } - - __forceinline unsigned maxTimeStepsPerGeometry() - { - unsigned ret = 0; - for (size_t i=0; i<scene->size(); i++) { - Ty* mesh = at(i); - if (mesh == nullptr) continue; - ret = max(ret,mesh->numTimeSteps); - } - return ret; - } - - private: - Scene* scene; - bool all; - }; - - class Iterator2 - { - public: - Iterator2 () {} - - Iterator2 (Scene* scene, Geometry::GTypeMask typemask, bool mblur) - : scene(scene), typemask(typemask), mblur(mblur) {} - - __forceinline Geometry* at(const size_t i) - { - Geometry* geom = scene->geometries[i].ptr; - if (geom == nullptr) return nullptr; - if (!geom->isEnabled()) return nullptr; - if (!(geom->getTypeMask() & typemask)) return nullptr; - if ((geom->numTimeSteps != 1) != mblur) return nullptr; - return geom; - } - - __forceinline Geometry* operator[] (const size_t i) { - return at(i); - } - - __forceinline size_t size() const { - return scene->size(); - } - - private: - Scene* scene; - Geometry::GTypeMask typemask; - bool mblur; - }; - - public: - - /*! Scene construction */ - Scene (Device* device); - - /*! Scene destruction */ - ~Scene () noexcept; - - private: - /*! class is non-copyable */ - Scene (const Scene& other) DELETED; // do not implement - Scene& operator= (const Scene& other) DELETED; // do not implement - - public: - void createTriangleAccel(); - void createTriangleMBAccel(); - void createQuadAccel(); - void createQuadMBAccel(); - void createHairAccel(); - void createHairMBAccel(); - void createSubdivAccel(); - void createSubdivMBAccel(); - void createUserGeometryAccel(); - void createUserGeometryMBAccel(); - void createInstanceAccel(); - void createInstanceMBAccel(); - void createInstanceExpensiveAccel(); - void createInstanceExpensiveMBAccel(); - void createGridAccel(); - void createGridMBAccel(); - - /*! prints statistics about the scene */ - void printStatistics(); - - /*! clears the scene */ - void clear(); - - /*! detaches some geometry */ - void detachGeometry(size_t geomID); - - void setBuildQuality(RTCBuildQuality quality_flags); - RTCBuildQuality getBuildQuality() const; - - void setSceneFlags(RTCSceneFlags scene_flags); - RTCSceneFlags getSceneFlags() const; - - void commit (bool join); - void commit_task (); - void build () {} - - void updateInterface(); - - /* return number of geometries */ - __forceinline size_t size() const { return geometries.size(); } - - /* bind geometry to the scene */ - unsigned int bind (unsigned geomID, Ref<Geometry> geometry); - - /* determines if scene is modified */ - __forceinline bool isModified() const { return modified; } - - /* sets modified flag */ - __forceinline void setModified(bool f = true) { - modified = f; - } - - __forceinline bool isGeometryModified(size_t geomID) - { - Ref<Geometry>& g = geometries[geomID]; - if (!g) return false; - return g->getModCounter() > geometryModCounters_[geomID]; - } - - protected: - - __forceinline void checkIfModifiedAndSet () - { - if (isModified ()) return; - - auto geometryIsModified = [this](size_t geomID)->bool { - return isGeometryModified(geomID); - }; - - if (parallel_any_of (size_t(0), geometries.size (), geometryIsModified)) { - setModified (); - } - } - - public: - - /* get mesh by ID */ - __forceinline Geometry* get(size_t i) { assert(i < geometries.size()); return geometries[i].ptr; } - __forceinline const Geometry* get(size_t i) const { assert(i < geometries.size()); return geometries[i].ptr; } - - template<typename Mesh> - __forceinline Mesh* get(size_t i) { - assert(i < geometries.size()); - assert(geometries[i]->getTypeMask() & Mesh::geom_type); - return (Mesh*)geometries[i].ptr; - } - template<typename Mesh> - __forceinline const Mesh* get(size_t i) const { - assert(i < geometries.size()); - assert(geometries[i]->getTypeMask() & Mesh::geom_type); - return (Mesh*)geometries[i].ptr; - } - - template<typename Mesh> - __forceinline Mesh* getSafe(size_t i) { - assert(i < geometries.size()); - if (geometries[i] == null) return nullptr; - if (!(geometries[i]->getTypeMask() & Mesh::geom_type)) return nullptr; - else return (Mesh*) geometries[i].ptr; - } - - __forceinline Ref<Geometry> get_locked(size_t i) { - Lock<SpinLock> lock(geometriesMutex); - assert(i < geometries.size()); - return geometries[i]; - } - - /* flag decoding */ - __forceinline bool isFastAccel() const { return !isCompactAccel() && !isRobustAccel(); } - __forceinline bool isCompactAccel() const { return scene_flags & RTC_SCENE_FLAG_COMPACT; } - __forceinline bool isRobustAccel() const { return scene_flags & RTC_SCENE_FLAG_ROBUST; } - __forceinline bool isStaticAccel() const { return !(scene_flags & RTC_SCENE_FLAG_DYNAMIC); } - __forceinline bool isDynamicAccel() const { return scene_flags & RTC_SCENE_FLAG_DYNAMIC; } - - __forceinline bool hasContextFilterFunction() const { - return scene_flags & RTC_SCENE_FLAG_CONTEXT_FILTER_FUNCTION; - } - - __forceinline bool hasGeometryFilterFunction() { - return world.numFilterFunctions != 0; - } - - __forceinline bool hasFilterFunction() { - return hasContextFilterFunction() || hasGeometryFilterFunction(); - } - - /* test if scene got already build */ - __forceinline bool isBuild() const { return is_build; } - - public: - IDPool<unsigned,0xFFFFFFFE> id_pool; - vector<Ref<Geometry>> geometries; //!< list of all user geometries - vector<unsigned int> geometryModCounters_; - vector<float*> vertices; - - public: - Device* device; - - /* these are to detect if we need to recreate the acceleration structures */ - bool flags_modified; - unsigned int enabled_geometry_types; - - RTCSceneFlags scene_flags; - RTCBuildQuality quality_flags; - MutexSys buildMutex; - SpinLock geometriesMutex; - bool is_build; - private: - bool modified; //!< true if scene got modified - - public: - - /*! global lock step task scheduler */ -#if defined(TASKING_INTERNAL) - MutexSys schedulerMutex; - Ref<TaskScheduler> scheduler; -#elif defined(TASKING_TBB) && TASKING_TBB_USE_TASK_ISOLATION - tbb::isolated_task_group group; -#elif defined(TASKING_TBB) - tbb::task_group group; -#elif defined(TASKING_PPL) - concurrency::task_group group; -#endif - - public: - struct BuildProgressMonitorInterface : public BuildProgressMonitor { - BuildProgressMonitorInterface(Scene* scene) - : scene(scene) {} - void operator() (size_t dn) const { scene->progressMonitor(double(dn)); } - private: - Scene* scene; - }; - BuildProgressMonitorInterface progressInterface; - RTCProgressMonitorFunction progress_monitor_function; - void* progress_monitor_ptr; - std::atomic<size_t> progress_monitor_counter; - void progressMonitor(double nprims); - void setProgressMonitorFunction(RTCProgressMonitorFunction func, void* ptr); - - private: - GeometryCounts world; //!< counts for geometry - - public: - - __forceinline size_t numPrimitives() const { - return world.size(); - } - - __forceinline size_t getNumPrimitives(Geometry::GTypeMask mask, bool mblur) const - { - size_t count = 0; - - if (mask & Geometry::MTY_TRIANGLE_MESH) - count += mblur ? world.numMBTriangles : world.numTriangles; - - if (mask & Geometry::MTY_QUAD_MESH) - count += mblur ? world.numMBQuads : world.numQuads; - - if (mask & Geometry::MTY_CURVE2) - count += mblur ? world.numMBLineSegments : world.numLineSegments; - - if (mask & Geometry::MTY_CURVE4) - count += mblur ? world.numMBBezierCurves : world.numBezierCurves; - - if (mask & Geometry::MTY_POINTS) - count += mblur ? world.numMBPoints : world.numPoints; - - if (mask & Geometry::MTY_SUBDIV_MESH) - count += mblur ? world.numMBSubdivPatches : world.numSubdivPatches; - - if (mask & Geometry::MTY_USER_GEOMETRY) - count += mblur ? world.numMBUserGeometries : world.numUserGeometries; - - if (mask & Geometry::MTY_INSTANCE_CHEAP) - count += mblur ? world.numMBInstancesCheap : world.numInstancesCheap; - - if (mask & Geometry::MTY_INSTANCE_EXPENSIVE) - count += mblur ? world.numMBInstancesExpensive : world.numInstancesExpensive; - - if (mask & Geometry::MTY_GRID_MESH) - count += mblur ? world.numMBGrids : world.numGrids; - - return count; - } - - template<typename Mesh, bool mblur> - __forceinline unsigned getNumTimeSteps() - { - if (!mblur) - return 1; - - Scene::Iterator<Mesh,mblur> iter(this); - return iter.maxTimeStepsPerGeometry(); - } - - template<typename Mesh, bool mblur> - __forceinline unsigned int getMaxGeomID() - { - Scene::Iterator<Mesh,mblur> iter(this); - return iter.maxGeomID(); - } - }; -} diff --git a/thirdparty/embree-aarch64/kernels/common/scene_curves.h b/thirdparty/embree-aarch64/kernels/common/scene_curves.h deleted file mode 100644 index 2649ab0e3e..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/scene_curves.h +++ /dev/null @@ -1,341 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" -#include "geometry.h" -#include "buffer.h" - -namespace embree -{ - /*! represents an array of bicubic bezier curves */ - struct CurveGeometry : public Geometry - { - /*! type of this geometry */ - static const Geometry::GTypeMask geom_type = Geometry::MTY_CURVE4; - - public: - - /*! bezier curve construction */ - CurveGeometry (Device* device, Geometry::GType gtype); - - public: - void setMask(unsigned mask); - void setNumTimeSteps (unsigned int numTimeSteps); - void setVertexAttributeCount (unsigned int N); - void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num); - void* getBuffer(RTCBufferType type, unsigned int slot); - void updateBuffer(RTCBufferType type, unsigned int slot); - void commit(); - bool verify(); - void setTessellationRate(float N); - void setMaxRadiusScale(float s); - void addElementsToCount (GeometryCounts & counts) const; - - public: - - /*! returns the number of vertices */ - __forceinline size_t numVertices() const { - return vertices[0].size(); - } - - /*! returns the i'th curve */ - __forceinline const unsigned int& curve(size_t i) const { - return curves[i]; - } - - /*! returns i'th vertex of the first time step */ - __forceinline Vec3ff vertex(size_t i) const { - return vertices0[i]; - } - - /*! returns i'th normal of the first time step */ - __forceinline Vec3fa normal(size_t i) const { - return normals0[i]; - } - - /*! returns i'th tangent of the first time step */ - __forceinline Vec3ff tangent(size_t i) const { - return tangents0[i]; - } - - /*! returns i'th normal derivative of the first time step */ - __forceinline Vec3fa dnormal(size_t i) const { - return dnormals0[i]; - } - - /*! returns i'th radius of the first time step */ - __forceinline float radius(size_t i) const { - return vertices0[i].w; - } - - /*! returns i'th vertex of itime'th timestep */ - __forceinline Vec3ff vertex(size_t i, size_t itime) const { - return vertices[itime][i]; - } - - /*! returns i'th normal of itime'th timestep */ - __forceinline Vec3fa normal(size_t i, size_t itime) const { - return normals[itime][i]; - } - - /*! returns i'th tangent of itime'th timestep */ - __forceinline Vec3ff tangent(size_t i, size_t itime) const { - return tangents[itime][i]; - } - - /*! returns i'th normal derivative of itime'th timestep */ - __forceinline Vec3fa dnormal(size_t i, size_t itime) const { - return dnormals[itime][i]; - } - - /*! returns i'th radius of itime'th timestep */ - __forceinline float radius(size_t i, size_t itime) const { - return vertices[itime][i].w; - } - - /*! gathers the curve starting with i'th vertex */ - __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i) const - { - p0 = vertex(i+0); - p1 = vertex(i+1); - p2 = vertex(i+2); - p3 = vertex(i+3); - } - - /*! gathers the curve starting with i'th vertex of itime'th timestep */ - __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, size_t itime) const - { - p0 = vertex(i+0,itime); - p1 = vertex(i+1,itime); - p2 = vertex(i+2,itime); - p3 = vertex(i+3,itime); - } - - /*! gathers the curve starting with i'th vertex */ - __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i) const - { - p0 = vertex(i+0); - p1 = vertex(i+1); - p2 = vertex(i+2); - p3 = vertex(i+3); - n0 = normal(i+0); - n1 = normal(i+1); - n2 = normal(i+2); - n3 = normal(i+3); - } - - /*! gathers the curve starting with i'th vertex of itime'th timestep */ - __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, size_t itime) const - { - p0 = vertex(i+0,itime); - p1 = vertex(i+1,itime); - p2 = vertex(i+2,itime); - p3 = vertex(i+3,itime); - n0 = normal(i+0,itime); - n1 = normal(i+1,itime); - n2 = normal(i+2,itime); - n3 = normal(i+3,itime); - } - - /*! prefetches the curve starting with i'th vertex of itime'th timestep */ - __forceinline void prefetchL1_vertices(size_t i) const - { - prefetchL1(vertices0.getPtr(i)+0); - prefetchL1(vertices0.getPtr(i)+64); - } - - /*! prefetches the curve starting with i'th vertex of itime'th timestep */ - __forceinline void prefetchL2_vertices(size_t i) const - { - prefetchL2(vertices0.getPtr(i)+0); - prefetchL2(vertices0.getPtr(i)+64); - } - - /*! loads curve vertices for specified time */ - __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, float time) const - { - float ftime; - const size_t itime = timeSegment(time, ftime); - - const float t0 = 1.0f - ftime; - const float t1 = ftime; - Vec3ff a0,a1,a2,a3; - gather(a0,a1,a2,a3,i,itime); - Vec3ff b0,b1,b2,b3; - gather(b0,b1,b2,b3,i,itime+1); - p0 = madd(Vec3ff(t0),a0,t1*b0); - p1 = madd(Vec3ff(t0),a1,t1*b1); - p2 = madd(Vec3ff(t0),a2,t1*b2); - p3 = madd(Vec3ff(t0),a3,t1*b3); - } - - /*! loads curve vertices for specified time */ - __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, float time) const - { - float ftime; - const size_t itime = timeSegment(time, ftime); - - const float t0 = 1.0f - ftime; - const float t1 = ftime; - Vec3ff a0,a1,a2,a3; Vec3fa an0,an1,an2,an3; - gather(a0,a1,a2,a3,an0,an1,an2,an3,i,itime); - Vec3ff b0,b1,b2,b3; Vec3fa bn0,bn1,bn2,bn3; - gather(b0,b1,b2,b3,bn0,bn1,bn2,bn3,i,itime+1); - p0 = madd(Vec3ff(t0),a0,t1*b0); - p1 = madd(Vec3ff(t0),a1,t1*b1); - p2 = madd(Vec3ff(t0),a2,t1*b2); - p3 = madd(Vec3ff(t0),a3,t1*b3); - n0 = madd(Vec3ff(t0),an0,t1*bn0); - n1 = madd(Vec3ff(t0),an1,t1*bn1); - n2 = madd(Vec3ff(t0),an2,t1*bn2); - n3 = madd(Vec3ff(t0),an3,t1*bn3); - } - - template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa> - __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const size_t itime) const - { - Vec3ff v0,v1,v2,v3; Vec3fa n0,n1,n2,n3; - unsigned int vertexID = curve(primID); - gather(v0,v1,v2,v3,n0,n1,n2,n3,vertexID,itime); - SourceCurve3ff ccurve(v0,v1,v2,v3); - SourceCurve3fa ncurve(n0,n1,n2,n3); - ccurve = enlargeRadiusToMinWidth(context,this,ray_org,ccurve); - return TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve); - } - - template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa> - __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const - { - float ftime; - const size_t itime = timeSegment(time, ftime); - const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+0); - const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+1); - return clerp(curve0,curve1,ftime); - } - - /*! gathers the hermite curve starting with i'th vertex */ - __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i) const - { - p0 = vertex (i+0); - p1 = vertex (i+1); - t0 = tangent(i+0); - t1 = tangent(i+1); - } - - /*! gathers the hermite curve starting with i'th vertex of itime'th timestep */ - __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, size_t itime) const - { - p0 = vertex (i+0,itime); - p1 = vertex (i+1,itime); - t0 = tangent(i+0,itime); - t1 = tangent(i+1,itime); - } - - /*! loads curve vertices for specified time */ - __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, float time) const - { - float ftime; - const size_t itime = timeSegment(time, ftime); - const float f0 = 1.0f - ftime, f1 = ftime; - Vec3ff ap0,at0,ap1,at1; - gather_hermite(ap0,at0,ap1,at1,i,itime); - Vec3ff bp0,bt0,bp1,bt1; - gather_hermite(bp0,bt0,bp1,bt1,i,itime+1); - p0 = madd(Vec3ff(f0),ap0,f1*bp0); - t0 = madd(Vec3ff(f0),at0,f1*bt0); - p1 = madd(Vec3ff(f0),ap1,f1*bp1); - t1 = madd(Vec3ff(f0),at1,f1*bt1); - } - - /*! gathers the hermite curve starting with i'th vertex */ - __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i) const - { - p0 = vertex (i+0); - p1 = vertex (i+1); - t0 = tangent(i+0); - t1 = tangent(i+1); - n0 = normal(i+0); - n1 = normal(i+1); - dn0 = dnormal(i+0); - dn1 = dnormal(i+1); - } - - /*! gathers the hermite curve starting with i'th vertex of itime'th timestep */ - __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, size_t itime) const - { - p0 = vertex (i+0,itime); - p1 = vertex (i+1,itime); - t0 = tangent(i+0,itime); - t1 = tangent(i+1,itime); - n0 = normal(i+0,itime); - n1 = normal(i+1,itime); - dn0 = dnormal(i+0,itime); - dn1 = dnormal(i+1,itime); - } - - /*! loads curve vertices for specified time */ - __forceinline void gather_hermite(Vec3ff& p0, Vec3fa& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3fa& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, float time) const - { - float ftime; - const size_t itime = timeSegment(time, ftime); - const float f0 = 1.0f - ftime, f1 = ftime; - Vec3ff ap0,at0,ap1,at1; Vec3fa an0,adn0,an1,adn1; - gather_hermite(ap0,at0,an0,adn0,ap1,at1,an1,adn1,i,itime); - Vec3ff bp0,bt0,bp1,bt1; Vec3fa bn0,bdn0,bn1,bdn1; - gather_hermite(bp0,bt0,bn0,bdn0,bp1,bt1,bn1,bdn1,i,itime+1); - p0 = madd(Vec3ff(f0),ap0,f1*bp0); - t0 = madd(Vec3ff(f0),at0,f1*bt0); - n0 = madd(Vec3ff(f0),an0,f1*bn0); - dn0= madd(Vec3ff(f0),adn0,f1*bdn0); - p1 = madd(Vec3ff(f0),ap1,f1*bp1); - t1 = madd(Vec3ff(f0),at1,f1*bt1); - n1 = madd(Vec3ff(f0),an1,f1*bn1); - dn1= madd(Vec3ff(f0),adn1,f1*bdn1); - } - - template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa> - __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const size_t itime) const - { - Vec3ff v0,t0,v1,t1; Vec3fa n0,dn0,n1,dn1; - unsigned int vertexID = curve(primID); - gather_hermite(v0,t0,n0,dn0,v1,t1,n1,dn1,vertexID,itime); - - SourceCurve3ff ccurve(v0,t0,v1,t1); - SourceCurve3fa ncurve(n0,dn0,n1,dn1); - ccurve = enlargeRadiusToMinWidth(context,this,ray_org,ccurve); - return TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve); - } - - template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa> - __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const - { - float ftime; - const size_t itime = timeSegment(time, ftime); - const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+0); - const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+1); - return clerp(curve0,curve1,ftime); - } - - private: - void resizeBuffers(unsigned int numSteps); - - public: - BufferView<unsigned int> curves; //!< array of curve indices - BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer - BufferView<Vec3fa> normals0; //!< fast access to first normal buffer - BufferView<Vec3ff> tangents0; //!< fast access to first tangent buffer - BufferView<Vec3fa> dnormals0; //!< fast access to first normal derivative buffer - vector<BufferView<Vec3ff>> vertices; //!< vertex array for each timestep - vector<BufferView<Vec3fa>> normals; //!< normal array for each timestep - vector<BufferView<Vec3ff>> tangents; //!< tangent array for each timestep - vector<BufferView<Vec3fa>> dnormals; //!< normal derivative array for each timestep - BufferView<char> flags; //!< start, end flag per segment - vector<BufferView<char>> vertexAttribs; //!< user buffers - int tessellationRate; //!< tessellation rate for flat curve - float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii - }; - - DECLARE_ISA_FUNCTION(CurveGeometry*, createCurves, Device* COMMA Geometry::GType); -} diff --git a/thirdparty/embree-aarch64/kernels/common/scene_grid_mesh.h b/thirdparty/embree-aarch64/kernels/common/scene_grid_mesh.h deleted file mode 100644 index c08658466a..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/scene_grid_mesh.h +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "geometry.h" -#include "buffer.h" - -namespace embree -{ - /*! Grid Mesh */ - struct GridMesh : public Geometry - { - /*! type of this geometry */ - static const Geometry::GTypeMask geom_type = Geometry::MTY_GRID_MESH; - - /*! grid */ - struct Grid - { - unsigned int startVtxID; - unsigned int lineVtxOffset; - unsigned short resX,resY; - - /* border flags due to 3x3 vertex pattern */ - __forceinline unsigned int get3x3FlagsX(const unsigned int x) const - { - return (x + 2 >= (unsigned int)resX) ? (1<<15) : 0; - } - - /* border flags due to 3x3 vertex pattern */ - __forceinline unsigned int get3x3FlagsY(const unsigned int y) const - { - return (y + 2 >= (unsigned int)resY) ? (1<<15) : 0; - } - - /*! outputs grid structure */ - __forceinline friend embree_ostream operator<<(embree_ostream cout, const Grid& t) { - return cout << "Grid { startVtxID " << t.startVtxID << ", lineVtxOffset " << t.lineVtxOffset << ", resX " << t.resX << ", resY " << t.resY << " }"; - } - }; - - public: - - /*! grid mesh construction */ - GridMesh (Device* device); - - /* geometry interface */ - public: - void setMask(unsigned mask); - void setNumTimeSteps (unsigned int numTimeSteps); - void setVertexAttributeCount (unsigned int N); - void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num); - void* getBuffer(RTCBufferType type, unsigned int slot); - void updateBuffer(RTCBufferType type, unsigned int slot); - void commit(); - bool verify(); - void interpolate(const RTCInterpolateArguments* const args); - void addElementsToCount (GeometryCounts & counts) const; - - __forceinline unsigned int getNumSubGrids(const size_t gridID) - { - const Grid &g = grid(gridID); - return max((unsigned int)1,((unsigned int)g.resX >> 1) * ((unsigned int)g.resY >> 1)); - } - - /*! get fast access to first vertex buffer */ - __forceinline float * getCompactVertexArray () const { - return (float*) vertices0.getPtr(); - } - - public: - - /*! returns number of vertices */ - __forceinline size_t numVertices() const { - return vertices[0].size(); - } - - /*! returns i'th grid*/ - __forceinline const Grid& grid(size_t i) const { - return grids[i]; - } - - /*! returns i'th vertex of the first time step */ - __forceinline const Vec3fa vertex(size_t i) const { // FIXME: check if this does a unaligned load - return vertices0[i]; - } - - /*! returns i'th vertex of the first time step */ - __forceinline const char* vertexPtr(size_t i) const { - return vertices0.getPtr(i); - } - - /*! returns i'th vertex of itime'th timestep */ - __forceinline const Vec3fa vertex(size_t i, size_t itime) const { - return vertices[itime][i]; - } - - /*! returns i'th vertex of itime'th timestep */ - __forceinline const char* vertexPtr(size_t i, size_t itime) const { - return vertices[itime].getPtr(i); - } - - /*! returns i'th vertex of the first timestep */ - __forceinline size_t grid_vertex_index(const Grid& g, size_t x, size_t y) const { - assert(x < (size_t)g.resX); - assert(y < (size_t)g.resY); - return g.startVtxID + x + y * g.lineVtxOffset; - } - - /*! returns i'th vertex of the first timestep */ - __forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y) const { - const size_t index = grid_vertex_index(g,x,y); - return vertex(index); - } - - /*! returns i'th vertex of the itime'th timestep */ - __forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y, size_t itime) const { - const size_t index = grid_vertex_index(g,x,y); - return vertex(index,itime); - } - - /*! calculates the build bounds of the i'th primitive, if it's valid */ - __forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, BBox3fa& bbox) const - { - BBox3fa b(empty); - for (size_t t=0; t<numTimeSteps; t++) - { - for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++) - for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++) - { - const Vec3fa v = grid_vertex(g,x,y,t); - if (unlikely(!isvalid(v))) return false; - b.extend(v); - } - } - - bbox = b; - return true; - } - - /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */ - __forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, size_t itime, BBox3fa& bbox) const - { - assert(itime < numTimeSteps); - BBox3fa b0(empty); - for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++) - for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++) - { - const Vec3fa v = grid_vertex(g,x,y,itime); - if (unlikely(!isvalid(v))) return false; - b0.extend(v); - } - - /* use bounds of first time step in builder */ - bbox = b0; - return true; - } - - __forceinline bool valid(size_t gridID, size_t itime=0) const { - return valid(gridID, make_range(itime, itime)); - } - - /*! check if the i'th primitive is valid between the specified time range */ - __forceinline bool valid(size_t gridID, const range<size_t>& itime_range) const - { - if (unlikely(gridID >= grids.size())) return false; - const Grid &g = grid(gridID); - if (unlikely(g.startVtxID + 0 >= vertices0.size())) return false; - if (unlikely(g.startVtxID + (g.resY-1)*g.lineVtxOffset + g.resX-1 >= vertices0.size())) return false; - - for (size_t y=0;y<g.resY;y++) - for (size_t x=0;x<g.resX;x++) - for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++) - if (!isvalid(grid_vertex(g,x,y,itime))) return false; - return true; - } - - - __forceinline BBox3fa bounds(const Grid& g, size_t sx, size_t sy, size_t itime) const - { - BBox3fa box(empty); - buildBounds(g,sx,sy,itime,box); - return box; - } - - __forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, size_t itime) const { - BBox3fa bounds0, bounds1; - buildBounds(g,sx,sy,itime+0,bounds0); - buildBounds(g,sx,sy,itime+1,bounds1); - return LBBox3fa(bounds0,bounds1); - } - - /*! calculates the linear bounds of the i'th primitive for the specified time range */ - __forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, const BBox1f& dt) const { - return LBBox3fa([&] (size_t itime) { return bounds(g,sx,sy,itime); }, dt, time_range, fnumTimeSegments); - } - - public: - BufferView<Grid> grids; //!< array of triangles - BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer - vector<BufferView<Vec3fa>> vertices; //!< vertex array for each timestep - vector<RawBufferView> vertexAttribs; //!< vertex attributes - }; - - namespace isa - { - struct GridMeshISA : public GridMesh - { - GridMeshISA (Device* device) - : GridMesh(device) {} - }; - } - - DECLARE_ISA_FUNCTION(GridMesh*, createGridMesh, Device*); -} diff --git a/thirdparty/embree-aarch64/kernels/common/scene_instance.h b/thirdparty/embree-aarch64/kernels/common/scene_instance.h deleted file mode 100644 index 7ff82a4fb8..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/scene_instance.h +++ /dev/null @@ -1,272 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "geometry.h" -#include "accel.h" - -namespace embree -{ - struct MotionDerivativeCoefficients; - - /*! Instanced acceleration structure */ - struct Instance : public Geometry - { - ALIGNED_STRUCT_(16); - static const Geometry::GTypeMask geom_type = Geometry::MTY_INSTANCE; - - public: - Instance (Device* device, Accel* object = nullptr, unsigned int numTimeSteps = 1); - ~Instance(); - - private: - Instance (const Instance& other) DELETED; // do not implement - Instance& operator= (const Instance& other) DELETED; // do not implement - - private: - LBBox3fa nonlinearBounds(const BBox1f& time_range_in, - const BBox1f& geom_time_range, - float geom_time_segments) const; - - BBox3fa boundSegment(size_t itime, - BBox3fa const& obbox0, BBox3fa const& obbox1, - BBox3fa const& bbox0, BBox3fa const& bbox1, - float t_min, float t_max) const; - - /* calculates the (correct) interpolated bounds */ - __forceinline BBox3fa bounds(size_t itime0, size_t itime1, float f) const - { - if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION)) - return xfmBounds(slerp(local2world[itime0], local2world[itime1], f), - lerp(getObjectBounds(itime0), getObjectBounds(itime1), f)); - return xfmBounds(lerp(local2world[itime0], local2world[itime1], f), - lerp(getObjectBounds(itime0), getObjectBounds(itime1), f)); - } - - public: - virtual void setNumTimeSteps (unsigned int numTimeSteps) override; - virtual void setInstancedScene(const Ref<Scene>& scene) override; - virtual void setTransform(const AffineSpace3fa& local2world, unsigned int timeStep) override; - virtual void setQuaternionDecomposition(const AffineSpace3ff& qd, unsigned int timeStep) override; - virtual AffineSpace3fa getTransform(float time) override; - virtual void setMask (unsigned mask) override; - virtual void build() {} - virtual void addElementsToCount (GeometryCounts & counts) const override; - virtual void commit() override; - - public: - - /*! calculates the bounds of instance */ - __forceinline BBox3fa bounds(size_t i) const { - assert(i == 0); - if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION)) - return xfmBounds(quaternionDecompositionToAffineSpace(local2world[0]),object->bounds.bounds()); - return xfmBounds(local2world[0],object->bounds.bounds()); - } - - /*! gets the bounds of the instanced scene */ - __forceinline BBox3fa getObjectBounds(size_t itime) const { - return object->getBounds(timeStep(itime)); - } - - /*! calculates the bounds of instance */ - __forceinline BBox3fa bounds(size_t i, size_t itime) const { - assert(i == 0); - if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION)) - return xfmBounds(quaternionDecompositionToAffineSpace(local2world[itime]),getObjectBounds(itime)); - return xfmBounds(local2world[itime],getObjectBounds(itime)); - } - - /*! calculates the linear bounds of the i'th primitive for the specified time range */ - __forceinline LBBox3fa linearBounds(size_t i, const BBox1f& dt) const { - assert(i == 0); - LBBox3fa lbbox = nonlinearBounds(dt, time_range, fnumTimeSegments); - return lbbox; - } - - /*! calculates the build bounds of the i'th item, if it's valid */ - __forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const - { - assert(i==0); - const BBox3fa b = bounds(i); - if (bbox) *bbox = b; - return isvalid(b); - } - - /*! calculates the build bounds of the i'th item at the itime'th time segment, if it's valid */ - __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const - { - assert(i==0); - const LBBox3fa bounds = linearBounds(i,itime); - bbox = bounds.bounds (); - return isvalid(bounds); - } - - /* gets version info of topology */ - unsigned int getTopologyVersion() const { - return numPrimitives; - } - - /* returns true if topology changed */ - bool topologyChanged(unsigned int otherVersion) const { - return numPrimitives != otherVersion; - } - - /*! check if the i'th primitive is valid between the specified time range */ - __forceinline bool valid(size_t i, const range<size_t>& itime_range) const - { - assert(i == 0); - for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++) - if (!isvalid(bounds(i,itime))) return false; - - return true; - } - - __forceinline AffineSpace3fa getLocal2World() const - { - if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION)) - return quaternionDecompositionToAffineSpace(local2world[0]); - return local2world[0]; - } - - __forceinline AffineSpace3fa getLocal2World(float t) const - { - float ftime; const unsigned int itime = timeSegment(t, ftime); - if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION)) - return slerp(local2world[itime+0],local2world[itime+1],ftime); - return lerp(local2world[itime+0],local2world[itime+1],ftime); - } - - __forceinline AffineSpace3fa getWorld2Local() const { - return world2local0; - } - - __forceinline AffineSpace3fa getWorld2Local(float t) const { - return rcp(getLocal2World(t)); - } - - template<int K> - __forceinline AffineSpace3vf<K> getWorld2Local(const vbool<K>& valid, const vfloat<K>& t) const - { - if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION)) - return getWorld2LocalSlerp(valid, t); - return getWorld2LocalLerp(valid, t); - } - - private: - - template<int K> - __forceinline AffineSpace3vf<K> getWorld2LocalSlerp(const vbool<K>& valid, const vfloat<K>& t) const - { - vfloat<K> ftime; - const vint<K> itime_k = timeSegment(t, ftime); - assert(any(valid)); - const size_t index = bsf(movemask(valid)); - const int itime = itime_k[index]; - if (likely(all(valid, itime_k == vint<K>(itime)))) { - return rcp(slerp(AffineSpace3vff<K>(local2world[itime+0]), - AffineSpace3vff<K>(local2world[itime+1]), - ftime)); - } - else { - AffineSpace3vff<K> space0,space1; - vbool<K> valid1 = valid; - while (any(valid1)) { - vbool<K> valid2; - const int itime = next_unique(valid1, itime_k, valid2); - space0 = select(valid2, AffineSpace3vff<K>(local2world[itime+0]), space0); - space1 = select(valid2, AffineSpace3vff<K>(local2world[itime+1]), space1); - } - return rcp(slerp(space0, space1, ftime)); - } - } - - template<int K> - __forceinline AffineSpace3vf<K> getWorld2LocalLerp(const vbool<K>& valid, const vfloat<K>& t) const - { - vfloat<K> ftime; - const vint<K> itime_k = timeSegment(t, ftime); - assert(any(valid)); - const size_t index = bsf(movemask(valid)); - const int itime = itime_k[index]; - if (likely(all(valid, itime_k == vint<K>(itime)))) { - return rcp(lerp(AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+0]), - AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+1]), - ftime)); - } else { - AffineSpace3vf<K> space0,space1; - vbool<K> valid1 = valid; - while (any(valid1)) { - vbool<K> valid2; - const int itime = next_unique(valid1, itime_k, valid2); - space0 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+0]), space0); - space1 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+1]), space1); - } - return rcp(lerp(space0, space1, ftime)); - } - } - - public: - Accel* object; //!< pointer to instanced acceleration structure - AffineSpace3ff* local2world; //!< transformation from local space to world space for each timestep (either normal matrix or quaternion decomposition) - AffineSpace3fa world2local0; //!< transformation from world space to local space for timestep 0 - }; - - namespace isa - { - struct InstanceISA : public Instance - { - InstanceISA (Device* device) - : Instance(device) {} - - PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const - { - assert(r.begin() == 0); - assert(r.end() == 1); - - PrimInfo pinfo(empty); - BBox3fa b = empty; - if (!buildBounds(0,&b)) return pinfo; - // const BBox3fa b = bounds(0); - // if (!isvalid(b)) return pinfo; - - const PrimRef prim(b,geomID,unsigned(0)); - pinfo.add_center2(prim); - prims[k++] = prim; - return pinfo; - } - - PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const - { - assert(r.begin() == 0); - assert(r.end() == 1); - - PrimInfo pinfo(empty); - BBox3fa b = empty; - if (!buildBounds(0,&b)) return pinfo; - // if (!valid(0,range<size_t>(itime))) return pinfo; - // const PrimRef prim(linearBounds(0,itime).bounds(),geomID,unsigned(0)); - const PrimRef prim(b,geomID,unsigned(0)); - pinfo.add_center2(prim); - prims[k++] = prim; - return pinfo; - } - - PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const - { - assert(r.begin() == 0); - assert(r.end() == 1); - - PrimInfoMB pinfo(empty); - if (!valid(0, timeSegmentRange(t0t1))) return pinfo; - const PrimRefMB prim(linearBounds(0,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(0)); - pinfo.add_primref(prim); - prims[k++] = prim; - return pinfo; - } - }; - } - - DECLARE_ISA_FUNCTION(Instance*, createInstance, Device*); -} diff --git a/thirdparty/embree-aarch64/kernels/common/scene_line_segments.h b/thirdparty/embree-aarch64/kernels/common/scene_line_segments.h deleted file mode 100644 index c0f9ee8f77..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/scene_line_segments.h +++ /dev/null @@ -1,307 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" -#include "geometry.h" -#include "buffer.h" - -namespace embree -{ - /*! represents an array of line segments */ - struct LineSegments : public Geometry - { - /*! type of this geometry */ - static const Geometry::GTypeMask geom_type = Geometry::MTY_CURVE2; - - public: - - /*! line segments construction */ - LineSegments (Device* device, Geometry::GType gtype); - - public: - void setMask (unsigned mask); - void setNumTimeSteps (unsigned int numTimeSteps); - void setVertexAttributeCount (unsigned int N); - void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num); - void* getBuffer(RTCBufferType type, unsigned int slot); - void updateBuffer(RTCBufferType type, unsigned int slot); - void commit(); - bool verify (); - void interpolate(const RTCInterpolateArguments* const args); - void setTessellationRate(float N); - void setMaxRadiusScale(float s); - void addElementsToCount (GeometryCounts & counts) const; - - public: - - /*! returns the number of vertices */ - __forceinline size_t numVertices() const { - return vertices[0].size(); - } - - /*! returns the i'th segment */ - __forceinline const unsigned int& segment(size_t i) const { - return segments[i]; - } - - /*! returns the segment to the left of the i'th segment */ - __forceinline bool segmentLeftExists(size_t i) const { - assert (flags); - return (flags[i] & RTC_CURVE_FLAG_NEIGHBOR_LEFT) != 0; - } - - /*! returns the segment to the right of the i'th segment */ - __forceinline bool segmentRightExists(size_t i) const { - assert (flags); - return (flags[i] & RTC_CURVE_FLAG_NEIGHBOR_RIGHT) != 0; - } - - /*! returns i'th vertex of the first time step */ - __forceinline Vec3ff vertex(size_t i) const { - return vertices0[i]; - } - - /*! returns i'th vertex of the first time step */ - __forceinline const char* vertexPtr(size_t i) const { - return vertices0.getPtr(i); - } - - /*! returns i'th normal of the first time step */ - __forceinline Vec3fa normal(size_t i) const { - return normals0[i]; - } - - /*! returns i'th radius of the first time step */ - __forceinline float radius(size_t i) const { - return vertices0[i].w; - } - - /*! returns i'th vertex of itime'th timestep */ - __forceinline Vec3ff vertex(size_t i, size_t itime) const { - return vertices[itime][i]; - } - - /*! returns i'th vertex of itime'th timestep */ - __forceinline const char* vertexPtr(size_t i, size_t itime) const { - return vertices[itime].getPtr(i); - } - - /*! returns i'th normal of itime'th timestep */ - __forceinline Vec3fa normal(size_t i, size_t itime) const { - return normals[itime][i]; - } - - /*! returns i'th radius of itime'th timestep */ - __forceinline float radius(size_t i, size_t itime) const { - return vertices[itime][i].w; - } - - /*! calculates bounding box of i'th line segment */ - __forceinline BBox3fa bounds(const Vec3ff& v0, const Vec3ff& v1) const - { - const BBox3ff b = merge(BBox3ff(v0),BBox3ff(v1)); - return enlarge((BBox3fa)b,maxRadiusScale*Vec3fa(max(v0.w,v1.w))); - } - - /*! calculates bounding box of i'th line segment */ - __forceinline BBox3fa bounds(size_t i) const - { - const unsigned int index = segment(i); - const Vec3ff v0 = vertex(index+0); - const Vec3ff v1 = vertex(index+1); - return bounds(v0,v1); - } - - /*! calculates bounding box of i'th line segment for the itime'th time step */ - __forceinline BBox3fa bounds(size_t i, size_t itime) const - { - const unsigned int index = segment(i); - const Vec3ff v0 = vertex(index+0,itime); - const Vec3ff v1 = vertex(index+1,itime); - return bounds(v0,v1); - } - - /*! calculates bounding box of i'th line segment */ - __forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i) const - { - const unsigned int index = segment(i); - const Vec3ff v0 = vertex(index+0); - const Vec3ff v1 = vertex(index+1); - const Vec3ff w0(xfmVector(space,(Vec3fa)v0),v0.w); - const Vec3ff w1(xfmVector(space,(Vec3fa)v1),v1.w); - return bounds(w0,w1); - } - - /*! calculates bounding box of i'th line segment for the itime'th time step */ - __forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i, size_t itime) const - { - const unsigned int index = segment(i); - const Vec3ff v0 = vertex(index+0,itime); - const Vec3ff v1 = vertex(index+1,itime); - const Vec3ff w0(xfmVector(space,(Vec3fa)v0),v0.w); - const Vec3ff w1(xfmVector(space,(Vec3fa)v1),v1.w); - return bounds(w0,w1); - } - - /*! check if the i'th primitive is valid at the itime'th timestep */ - __forceinline bool valid(size_t i, size_t itime) const { - return valid(i, make_range(itime, itime)); - } - - /*! check if the i'th primitive is valid between the specified time range */ - __forceinline bool valid(size_t i, const range<size_t>& itime_range) const - { - const unsigned int index = segment(i); - if (index+1 >= numVertices()) return false; - - for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++) - { - const Vec3ff v0 = vertex(index+0,itime); if (unlikely(!isvalid4(v0))) return false; - const Vec3ff v1 = vertex(index+1,itime); if (unlikely(!isvalid4(v1))) return false; - if (min(v0.w,v1.w) < 0.0f) return false; - } - return true; - } - - /*! calculates the linear bounds of the i'th primitive at the itimeGlobal'th time segment */ - __forceinline LBBox3fa linearBounds(size_t i, size_t itime) const { - return LBBox3fa(bounds(i,itime+0),bounds(i,itime+1)); - } - - /*! calculates the build bounds of the i'th primitive, if it's valid */ - __forceinline bool buildBounds(size_t i, BBox3fa* bbox) const - { - if (!valid(i,0)) return false; - *bbox = bounds(i); - return true; - } - - /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */ - __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const - { - if (!valid(i,itime+0) || !valid(i,itime+1)) return false; - bbox = bounds(i,itime); // use bounds of first time step in builder - return true; - } - - /*! calculates the linear bounds of the i'th primitive for the specified time range */ - __forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const { - return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments); - } - - /*! calculates the linear bounds of the i'th primitive for the specified time range */ - __forceinline LBBox3fa linearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& dt) const { - return LBBox3fa([&] (size_t itime) { return bounds(space, primID, itime); }, dt, time_range, fnumTimeSegments); - } - - /*! calculates the linear bounds of the i'th primitive for the specified time range */ - __forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const - { - if (!valid(i, timeSegmentRange(time_range))) return false; - bbox = linearBounds(i, time_range); - return true; - } - - /*! get fast access to first vertex buffer */ - __forceinline float * getCompactVertexArray () const { - return (float*) vertices0.getPtr(); - } - - public: - BufferView<unsigned int> segments; //!< array of line segment indices - BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer - BufferView<Vec3fa> normals0; //!< fast access to first normal buffer - BufferView<char> flags; //!< start, end flag per segment - vector<BufferView<Vec3ff>> vertices; //!< vertex array for each timestep - vector<BufferView<Vec3fa>> normals; //!< normal array for each timestep - vector<BufferView<char>> vertexAttribs; //!< user buffers - int tessellationRate; //!< tessellation rate for bezier curve - float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii - }; - - namespace isa - { - struct LineSegmentsISA : public LineSegments - { - LineSegmentsISA (Device* device, Geometry::GType gtype) - : LineSegments(device,gtype) {} - - Vec3fa computeDirection(unsigned int primID) const - { - const unsigned vtxID = segment(primID); - const Vec3fa v0 = vertex(vtxID+0); - const Vec3fa v1 = vertex(vtxID+1); - return v1-v0; - } - - Vec3fa computeDirection(unsigned int primID, size_t time) const - { - const unsigned vtxID = segment(primID); - const Vec3fa v0 = vertex(vtxID+0,time); - const Vec3fa v1 = vertex(vtxID+1,time); - return v1-v0; - } - - PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const - { - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - BBox3fa bounds = empty; - if (!buildBounds(j,&bounds)) continue; - const PrimRef prim(bounds,geomID,unsigned(j)); - pinfo.add_center2(prim); - prims[k++] = prim; - } - return pinfo; - } - - PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const - { - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - BBox3fa bounds = empty; - if (!buildBounds(j,itime,bounds)) continue; - const PrimRef prim(bounds,geomID,unsigned(j)); - pinfo.add_center2(prim); - prims[k++] = prim; - } - return pinfo; - } - - PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const - { - PrimInfoMB pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - if (!valid(j, timeSegmentRange(t0t1))) continue; - const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j)); - pinfo.add_primref(prim); - prims[k++] = prim; - } - return pinfo; - } - - BBox3fa vbounds(size_t i) const { - return bounds(i); - } - - BBox3fa vbounds(const LinearSpace3fa& space, size_t i) const { - return bounds(space,i); - } - - LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const { - return linearBounds(primID,time_range); - } - - LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const { - return linearBounds(space,primID,time_range); - } - }; - } - - DECLARE_ISA_FUNCTION(LineSegments*, createLineSegments, Device* COMMA Geometry::GType); -} diff --git a/thirdparty/embree-aarch64/kernels/common/scene_points.h b/thirdparty/embree-aarch64/kernels/common/scene_points.h deleted file mode 100644 index 1d39ed07ba..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/scene_points.h +++ /dev/null @@ -1,282 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "buffer.h" -#include "default.h" -#include "geometry.h" - -namespace embree -{ - /*! represents an array of points */ - struct Points : public Geometry - { - /*! type of this geometry */ - static const Geometry::GTypeMask geom_type = Geometry::MTY_POINTS; - - public: - /*! line segments construction */ - Points(Device* device, Geometry::GType gtype); - - public: - void setMask(unsigned mask); - void setNumTimeSteps(unsigned int numTimeSteps); - void setVertexAttributeCount(unsigned int N); - void setBuffer(RTCBufferType type, - unsigned int slot, - RTCFormat format, - const Ref<Buffer>& buffer, - size_t offset, - size_t stride, - unsigned int num); - void* getBuffer(RTCBufferType type, unsigned int slot); - void updateBuffer(RTCBufferType type, unsigned int slot); - void commit(); - bool verify(); - void setMaxRadiusScale(float s); - void addElementsToCount (GeometryCounts & counts) const; - - public: - /*! returns the number of vertices */ - __forceinline size_t numVertices() const { - return vertices[0].size(); - } - - /*! returns i'th vertex of the first time step */ - __forceinline Vec3ff vertex(size_t i) const { - return vertices0[i]; - } - - /*! returns i'th vertex of the first time step */ - __forceinline const char* vertexPtr(size_t i) const { - return vertices0.getPtr(i); - } - - /*! returns i'th normal of the first time step */ - __forceinline Vec3fa normal(size_t i) const { - return normals0[i]; - } - - /*! returns i'th radius of the first time step */ - __forceinline float radius(size_t i) const { - return vertices0[i].w; - } - - /*! returns i'th vertex of itime'th timestep */ - __forceinline Vec3ff vertex(size_t i, size_t itime) const { - return vertices[itime][i]; - } - - /*! returns i'th vertex of itime'th timestep */ - __forceinline const char* vertexPtr(size_t i, size_t itime) const { - return vertices[itime].getPtr(i); - } - - /*! returns i'th normal of itime'th timestep */ - __forceinline Vec3fa normal(size_t i, size_t itime) const { - return normals[itime][i]; - } - - /*! returns i'th radius of itime'th timestep */ - __forceinline float radius(size_t i, size_t itime) const { - return vertices[itime][i].w; - } - - /*! calculates bounding box of i'th line segment */ - __forceinline BBox3fa bounds(const Vec3ff& v0) const { - return enlarge(BBox3fa(v0), maxRadiusScale*Vec3fa(v0.w)); - } - - /*! calculates bounding box of i'th line segment */ - __forceinline BBox3fa bounds(size_t i) const - { - const Vec3ff v0 = vertex(i); - return bounds(v0); - } - - /*! calculates bounding box of i'th line segment for the itime'th time step */ - __forceinline BBox3fa bounds(size_t i, size_t itime) const - { - const Vec3ff v0 = vertex(i, itime); - return bounds(v0); - } - - /*! calculates bounding box of i'th line segment */ - __forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i) const - { - const Vec3ff v0 = vertex(i); - const Vec3ff w0(xfmVector(space, (Vec3fa)v0), v0.w); - return bounds(w0); - } - - /*! calculates bounding box of i'th line segment for the itime'th time step */ - __forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i, size_t itime) const - { - const Vec3ff v0 = vertex(i, itime); - const Vec3ff w0(xfmVector(space, (Vec3fa)v0), v0.w); - return bounds(w0); - } - - /*! check if the i'th primitive is valid at the itime'th timestep */ - __forceinline bool valid(size_t i, size_t itime) const { - return valid(i, make_range(itime, itime)); - } - - /*! check if the i'th primitive is valid between the specified time range */ - __forceinline bool valid(size_t i, const range<size_t>& itime_range) const - { - const unsigned int index = (unsigned int)i; - if (index >= numVertices()) - return false; - - for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++) { - const Vec3ff v0 = vertex(index + 0, itime); - if (unlikely(!isvalid4(v0))) - return false; - if (v0.w < 0.0f) - return false; - } - return true; - } - - /*! calculates the linear bounds of the i'th primitive at the itimeGlobal'th time segment */ - __forceinline LBBox3fa linearBounds(size_t i, size_t itime) const { - return LBBox3fa(bounds(i, itime + 0), bounds(i, itime + 1)); - } - - /*! calculates the build bounds of the i'th primitive, if it's valid */ - __forceinline bool buildBounds(size_t i, BBox3fa* bbox) const - { - if (!valid(i, 0)) - return false; - *bbox = bounds(i); - return true; - } - - /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */ - __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const - { - if (!valid(i, itime + 0) || !valid(i, itime + 1)) - return false; - bbox = bounds(i, itime); // use bounds of first time step in builder - return true; - } - - /*! calculates the linear bounds of the i'th primitive for the specified time range */ - __forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const { - return LBBox3fa([&](size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments); - } - - /*! calculates the linear bounds of the i'th primitive for the specified time range */ - __forceinline LBBox3fa linearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& dt) const { - return LBBox3fa([&](size_t itime) { return bounds(space, primID, itime); }, dt, time_range, fnumTimeSegments); - } - - /*! calculates the linear bounds of the i'th primitive for the specified time range */ - __forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const - { - if (!valid(i, timeSegmentRange(time_range))) return false; - bbox = linearBounds(i, time_range); - return true; - } - - /*! get fast access to first vertex buffer */ - __forceinline float * getCompactVertexArray () const { - return (float*) vertices0.getPtr(); - } - - public: - BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer - BufferView<Vec3fa> normals0; //!< fast access to first normal buffer - vector<BufferView<Vec3ff>> vertices; //!< vertex array for each timestep - vector<BufferView<Vec3fa>> normals; //!< normal array for each timestep - vector<BufferView<char>> vertexAttribs; //!< user buffers - float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii - }; - - namespace isa - { - struct PointsISA : public Points - { - PointsISA(Device* device, Geometry::GType gtype) : Points(device, gtype) {} - - Vec3fa computeDirection(unsigned int primID) const - { - return Vec3fa(1, 0, 0); - } - - Vec3fa computeDirection(unsigned int primID, size_t time) const - { - return Vec3fa(1, 0, 0); - } - - PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const - { - PrimInfo pinfo(empty); - for (size_t j = r.begin(); j < r.end(); j++) { - BBox3fa bounds = empty; - if (!buildBounds(j, &bounds)) - continue; - const PrimRef prim(bounds, geomID, unsigned(j)); - pinfo.add_center2(prim); - prims[k++] = prim; - } - return pinfo; - } - - PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const - { - PrimInfo pinfo(empty); - for (size_t j = r.begin(); j < r.end(); j++) { - BBox3fa bounds = empty; - if (!buildBounds(j, itime, bounds)) - continue; - const PrimRef prim(bounds, geomID, unsigned(j)); - pinfo.add_center2(prim); - prims[k++] = prim; - } - return pinfo; - } - - PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, - const BBox1f& t0t1, - const range<size_t>& r, - size_t k, - unsigned int geomID) const - { - PrimInfoMB pinfo(empty); - for (size_t j = r.begin(); j < r.end(); j++) { - if (!valid(j, timeSegmentRange(t0t1))) - continue; - const PrimRefMB prim(linearBounds(j, t0t1), this->numTimeSegments(), this->time_range, this->numTimeSegments(), geomID, unsigned(j)); - pinfo.add_primref(prim); - prims[k++] = prim; - } - return pinfo; - } - - BBox3fa vbounds(size_t i) const - { - return bounds(i); - } - - BBox3fa vbounds(const LinearSpace3fa& space, size_t i) const - { - return bounds(space, i); - } - - LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const - { - return linearBounds(primID, time_range); - } - - LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const - { - return linearBounds(space, primID, time_range); - } - }; - } // namespace isa - - DECLARE_ISA_FUNCTION(Points*, createPoints, Device* COMMA Geometry::GType); -} // namespace embree diff --git a/thirdparty/embree-aarch64/kernels/common/scene_quad_mesh.h b/thirdparty/embree-aarch64/kernels/common/scene_quad_mesh.h deleted file mode 100644 index d5bb054b14..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/scene_quad_mesh.h +++ /dev/null @@ -1,277 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "geometry.h" -#include "buffer.h" - -namespace embree -{ - /*! Quad Mesh */ - struct QuadMesh : public Geometry - { - /*! type of this geometry */ - static const Geometry::GTypeMask geom_type = Geometry::MTY_QUAD_MESH; - - /*! triangle indices */ - struct Quad - { - uint32_t v[4]; - - /*! outputs triangle indices */ - __forceinline friend embree_ostream operator<<(embree_ostream cout, const Quad& q) { - return cout << "Quad {" << q.v[0] << ", " << q.v[1] << ", " << q.v[2] << ", " << q.v[3] << " }"; - } - }; - - public: - - /*! quad mesh construction */ - QuadMesh (Device* device); - - /* geometry interface */ - public: - void setMask(unsigned mask); - void setNumTimeSteps (unsigned int numTimeSteps); - void setVertexAttributeCount (unsigned int N); - void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num); - void* getBuffer(RTCBufferType type, unsigned int slot); - void updateBuffer(RTCBufferType type, unsigned int slot); - void commit(); - bool verify(); - void interpolate(const RTCInterpolateArguments* const args); - void addElementsToCount (GeometryCounts & counts) const; - - public: - - /*! returns number of vertices */ - __forceinline size_t numVertices() const { - return vertices[0].size(); - } - - /*! returns i'th quad */ - __forceinline const Quad& quad(size_t i) const { - return quads[i]; - } - - /*! returns i'th vertex of itime'th timestep */ - __forceinline const Vec3fa vertex(size_t i) const { - return vertices0[i]; - } - - /*! returns i'th vertex of itime'th timestep */ - __forceinline const char* vertexPtr(size_t i) const { - return vertices0.getPtr(i); - } - - /*! returns i'th vertex of itime'th timestep */ - __forceinline const Vec3fa vertex(size_t i, size_t itime) const { - return vertices[itime][i]; - } - - /*! returns i'th vertex of itime'th timestep */ - __forceinline const char* vertexPtr(size_t i, size_t itime) const { - return vertices[itime].getPtr(i); - } - - /*! calculates the bounds of the i'th quad */ - __forceinline BBox3fa bounds(size_t i) const - { - const Quad& q = quad(i); - const Vec3fa v0 = vertex(q.v[0]); - const Vec3fa v1 = vertex(q.v[1]); - const Vec3fa v2 = vertex(q.v[2]); - const Vec3fa v3 = vertex(q.v[3]); - return BBox3fa(min(v0,v1,v2,v3),max(v0,v1,v2,v3)); - } - - /*! calculates the bounds of the i'th quad at the itime'th timestep */ - __forceinline BBox3fa bounds(size_t i, size_t itime) const - { - const Quad& q = quad(i); - const Vec3fa v0 = vertex(q.v[0],itime); - const Vec3fa v1 = vertex(q.v[1],itime); - const Vec3fa v2 = vertex(q.v[2],itime); - const Vec3fa v3 = vertex(q.v[3],itime); - return BBox3fa(min(v0,v1,v2,v3),max(v0,v1,v2,v3)); - } - - /*! check if the i'th primitive is valid at the itime'th timestep */ - __forceinline bool valid(size_t i, size_t itime) const { - return valid(i, make_range(itime, itime)); - } - - /*! check if the i'th primitive is valid between the specified time range */ - __forceinline bool valid(size_t i, const range<size_t>& itime_range) const - { - const Quad& q = quad(i); - if (unlikely(q.v[0] >= numVertices())) return false; - if (unlikely(q.v[1] >= numVertices())) return false; - if (unlikely(q.v[2] >= numVertices())) return false; - if (unlikely(q.v[3] >= numVertices())) return false; - - for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++) - { - if (!isvalid(vertex(q.v[0],itime))) return false; - if (!isvalid(vertex(q.v[1],itime))) return false; - if (!isvalid(vertex(q.v[2],itime))) return false; - if (!isvalid(vertex(q.v[3],itime))) return false; - } - - return true; - } - - /*! calculates the linear bounds of the i'th quad at the itimeGlobal'th time segment */ - __forceinline LBBox3fa linearBounds(size_t i, size_t itime) const { - return LBBox3fa(bounds(i,itime+0),bounds(i,itime+1)); - } - - /*! calculates the build bounds of the i'th primitive, if it's valid */ - __forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const - { - const Quad& q = quad(i); - if (q.v[0] >= numVertices()) return false; - if (q.v[1] >= numVertices()) return false; - if (q.v[2] >= numVertices()) return false; - if (q.v[3] >= numVertices()) return false; - - for (unsigned int t=0; t<numTimeSteps; t++) - { - const Vec3fa v0 = vertex(q.v[0],t); - const Vec3fa v1 = vertex(q.v[1],t); - const Vec3fa v2 = vertex(q.v[2],t); - const Vec3fa v3 = vertex(q.v[3],t); - - if (unlikely(!isvalid(v0) || !isvalid(v1) || !isvalid(v2) || !isvalid(v3))) - return false; - } - - if (bbox) - *bbox = bounds(i); - - return true; - } - - /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */ - __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const - { - const Quad& q = quad(i); - if (unlikely(q.v[0] >= numVertices())) return false; - if (unlikely(q.v[1] >= numVertices())) return false; - if (unlikely(q.v[2] >= numVertices())) return false; - if (unlikely(q.v[3] >= numVertices())) return false; - - assert(itime+1 < numTimeSteps); - const Vec3fa a0 = vertex(q.v[0],itime+0); if (unlikely(!isvalid(a0))) return false; - const Vec3fa a1 = vertex(q.v[1],itime+0); if (unlikely(!isvalid(a1))) return false; - const Vec3fa a2 = vertex(q.v[2],itime+0); if (unlikely(!isvalid(a2))) return false; - const Vec3fa a3 = vertex(q.v[3],itime+0); if (unlikely(!isvalid(a3))) return false; - const Vec3fa b0 = vertex(q.v[0],itime+1); if (unlikely(!isvalid(b0))) return false; - const Vec3fa b1 = vertex(q.v[1],itime+1); if (unlikely(!isvalid(b1))) return false; - const Vec3fa b2 = vertex(q.v[2],itime+1); if (unlikely(!isvalid(b2))) return false; - const Vec3fa b3 = vertex(q.v[3],itime+1); if (unlikely(!isvalid(b3))) return false; - - /* use bounds of first time step in builder */ - bbox = BBox3fa(min(a0,a1,a2,a3),max(a0,a1,a2,a3)); - return true; - } - - /*! calculates the linear bounds of the i'th primitive for the specified time range */ - __forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const { - return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments); - } - - /*! calculates the linear bounds of the i'th primitive for the specified time range */ - __forceinline bool linearBounds(size_t i, const BBox1f& dt, LBBox3fa& bbox) const - { - if (!valid(i, timeSegmentRange(dt))) return false; - bbox = linearBounds(i, dt); - return true; - } - - /*! get fast access to first vertex buffer */ - __forceinline float * getCompactVertexArray () const { - return (float*) vertices0.getPtr(); - } - - /* gets version info of topology */ - unsigned int getTopologyVersion() const { - return quads.modCounter; - } - - /* returns true if topology changed */ - bool topologyChanged(unsigned int otherVersion) const { - return quads.isModified(otherVersion); // || numPrimitivesChanged; - } - - /* returns the projected area */ - __forceinline float projectedPrimitiveArea(const size_t i) const { - const Quad& q = quad(i); - const Vec3fa v0 = vertex(q.v[0]); - const Vec3fa v1 = vertex(q.v[1]); - const Vec3fa v2 = vertex(q.v[2]); - const Vec3fa v3 = vertex(q.v[3]); - return areaProjectedTriangle(v0,v1,v3) + - areaProjectedTriangle(v1,v2,v3); - } - - public: - BufferView<Quad> quads; //!< array of quads - BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer - vector<BufferView<Vec3fa>> vertices; //!< vertex array for each timestep - vector<BufferView<char>> vertexAttribs; //!< vertex attribute buffers - }; - - namespace isa - { - struct QuadMeshISA : public QuadMesh - { - QuadMeshISA (Device* device) - : QuadMesh(device) {} - - PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const - { - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - BBox3fa bounds = empty; - if (!buildBounds(j,&bounds)) continue; - const PrimRef prim(bounds,geomID,unsigned(j)); - pinfo.add_center2(prim); - prims[k++] = prim; - } - return pinfo; - } - - PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const - { - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - BBox3fa bounds = empty; - if (!buildBounds(j,itime,bounds)) continue; - const PrimRef prim(bounds,geomID,unsigned(j)); - pinfo.add_center2(prim); - prims[k++] = prim; - } - return pinfo; - } - - PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const - { - PrimInfoMB pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - if (!valid(j, timeSegmentRange(t0t1))) continue; - const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j)); - pinfo.add_primref(prim); - prims[k++] = prim; - } - return pinfo; - } - }; - } - - DECLARE_ISA_FUNCTION(QuadMesh*, createQuadMesh, Device*); -} diff --git a/thirdparty/embree-aarch64/kernels/common/scene_subdiv_mesh.h b/thirdparty/embree-aarch64/kernels/common/scene_subdiv_mesh.h deleted file mode 100644 index d0246009db..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/scene_subdiv_mesh.h +++ /dev/null @@ -1,326 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "geometry.h" -#include "buffer.h" -#include "../subdiv/half_edge.h" -#include "../subdiv/tessellation_cache.h" -#include "../subdiv/catmullclark_coefficients.h" -#include "../subdiv/patch.h" -#include "../../common/algorithms/parallel_map.h" -#include "../../common/algorithms/parallel_set.h" - -namespace embree -{ - class SubdivMesh : public Geometry - { - ALIGNED_CLASS_(16); - public: - - typedef HalfEdge::Edge Edge; - - /*! type of this geometry */ - static const Geometry::GTypeMask geom_type = Geometry::MTY_SUBDIV_MESH; - - /*! structure used to sort half edges using radix sort by their key */ - struct KeyHalfEdge - { - KeyHalfEdge() {} - - KeyHalfEdge (uint64_t key, HalfEdge* edge) - : key(key), edge(edge) {} - - __forceinline operator uint64_t() const { - return key; - } - - friend __forceinline bool operator<(const KeyHalfEdge& e0, const KeyHalfEdge& e1) { - return e0.key < e1.key; - } - - public: - uint64_t key; - HalfEdge* edge; - }; - - public: - - /*! subdiv mesh construction */ - SubdivMesh(Device* device); - - public: - void setMask (unsigned mask); - void setSubdivisionMode (unsigned int topologyID, RTCSubdivisionMode mode); - void setVertexAttributeTopology(unsigned int vertexAttribID, unsigned int topologyID); - void setNumTimeSteps (unsigned int numTimeSteps); - void setVertexAttributeCount (unsigned int N); - void setTopologyCount (unsigned int N); - void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num); - void* getBuffer(RTCBufferType type, unsigned int slot); - void updateBuffer(RTCBufferType type, unsigned int slot); - void setTessellationRate(float N); - bool verify(); - void commit(); - void addElementsToCount (GeometryCounts & counts) const; - void setDisplacementFunction (RTCDisplacementFunctionN func); - unsigned int getFirstHalfEdge(unsigned int faceID); - unsigned int getFace(unsigned int edgeID); - unsigned int getNextHalfEdge(unsigned int edgeID); - unsigned int getPreviousHalfEdge(unsigned int edgeID); - unsigned int getOppositeHalfEdge(unsigned int topologyID, unsigned int edgeID); - - public: - - /*! return the number of faces */ - size_t numFaces() const { - return faceVertices.size(); - } - - /*! return the number of edges */ - size_t numEdges() const { - return topology[0].vertexIndices.size(); - } - - /*! return the number of vertices */ - size_t numVertices() const { - return vertices[0].size(); - } - - /*! calculates the bounds of the i'th subdivision patch at the j'th timestep */ - __forceinline BBox3fa bounds(size_t i, size_t j = 0) const { - return topology[0].getHalfEdge(i)->bounds(vertices[j]); - } - - /*! check if the i'th primitive is valid */ - __forceinline bool valid(size_t i) const { - return topology[0].valid(i) && !invalidFace(i); - } - - /*! check if the i'th primitive is valid for the j'th time range */ - __forceinline bool valid(size_t i, size_t j) const { - return topology[0].valid(i) && !invalidFace(i,j); - } - - /*! prints some statistics */ - void printStatistics(); - - /*! initializes the half edge data structure */ - void initializeHalfEdgeStructures (); - - public: - - /*! returns the vertex buffer for some time step */ - __forceinline const BufferView<Vec3fa>& getVertexBuffer( const size_t t = 0 ) const { - return vertices[t]; - } - - /* returns tessellation level of edge */ - __forceinline float getEdgeLevel(const size_t i) const - { - if (levels) return clamp(levels[i],1.0f,4096.0f); // FIXME: do we want to limit edge level? - else return clamp(tessellationRate,1.0f,4096.0f); // FIXME: do we want to limit edge level? - } - - public: - RTCDisplacementFunctionN displFunc; //!< displacement function - - /*! all buffers in this section are provided by the application */ - public: - - /*! the topology contains all data that may differ when - * interpolating different user data buffers */ - struct Topology - { - public: - - /*! Default topology construction */ - Topology () : halfEdges(nullptr,0) {} - - /*! Topology initialization */ - Topology (SubdivMesh* mesh); - - /*! make the class movable */ - public: - Topology (Topology&& other) // FIXME: this is only required to workaround compilation issues under Windows - : mesh(std::move(other.mesh)), - vertexIndices(std::move(other.vertexIndices)), - subdiv_mode(std::move(other.subdiv_mode)), - halfEdges(std::move(other.halfEdges)), - halfEdges0(std::move(other.halfEdges0)), - halfEdges1(std::move(other.halfEdges1)) {} - - Topology& operator= (Topology&& other) // FIXME: this is only required to workaround compilation issues under Windows - { - mesh = std::move(other.mesh); - vertexIndices = std::move(other.vertexIndices); - subdiv_mode = std::move(other.subdiv_mode); - halfEdges = std::move(other.halfEdges); - halfEdges0 = std::move(other.halfEdges0); - halfEdges1 = std::move(other.halfEdges1); - return *this; - } - - public: - /*! check if the i'th primitive is valid in this topology */ - __forceinline bool valid(size_t i) const - { - if (unlikely(subdiv_mode == RTC_SUBDIVISION_MODE_NO_BOUNDARY)) { - if (getHalfEdge(i)->faceHasBorder()) return false; - } - return true; - } - - /*! updates the interpolation mode for the topology */ - void setSubdivisionMode (RTCSubdivisionMode mode); - - /*! marks all buffers as modified */ - void update (); - - /*! verifies index array */ - bool verify (size_t numVertices); - - /*! initializes the half edge data structure */ - void initializeHalfEdgeStructures (); - - private: - - /*! recalculates the half edges */ - void calculateHalfEdges(); - - /*! updates half edges when recalculation is not necessary */ - void updateHalfEdges(); - - /*! user input data */ - public: - - SubdivMesh* mesh; - - /*! indices of the vertices composing each face */ - BufferView<unsigned int> vertexIndices; - - /*! subdiv interpolation mode */ - RTCSubdivisionMode subdiv_mode; - - /*! generated data */ - public: - - /*! returns the start half edge for face f */ - __forceinline const HalfEdge* getHalfEdge ( const size_t f ) const { - return &halfEdges[mesh->faceStartEdge[f]]; - } - - /*! Half edge structure, generated by initHalfEdgeStructures */ - mvector<HalfEdge> halfEdges; - - /*! the following data is only required during construction of the - * half edge structure and can be cleared for static scenes */ - private: - - /*! two arrays used to sort the half edges */ - std::vector<KeyHalfEdge> halfEdges0; - std::vector<KeyHalfEdge> halfEdges1; - }; - - /*! returns the start half edge for topology t and face f */ - __forceinline const HalfEdge* getHalfEdge ( const size_t t , const size_t f ) const { - return topology[t].getHalfEdge(f); - } - - /*! buffer containing the number of vertices for each face */ - BufferView<unsigned int> faceVertices; - - /*! array of topologies */ - vector<Topology> topology; - - /*! vertex buffer (one buffer for each time step) */ - vector<BufferView<Vec3fa>> vertices; - - /*! user data buffers */ - vector<RawBufferView> vertexAttribs; - - /*! edge crease buffer containing edges (pairs of vertices) that carry edge crease weights */ - BufferView<Edge> edge_creases; - - /*! edge crease weights for each edge of the edge_creases buffer */ - BufferView<float> edge_crease_weights; - - /*! vertex crease buffer containing all vertices that carry vertex crease weights */ - BufferView<unsigned int> vertex_creases; - - /*! vertex crease weights for each vertex of the vertex_creases buffer */ - BufferView<float> vertex_crease_weights; - - /*! subdivision level for each half edge of the vertexIndices buffer */ - BufferView<float> levels; - float tessellationRate; // constant rate that is used when levels is not set - - /*! buffer that marks specific faces as holes */ - BufferView<unsigned> holes; - - /*! all data in this section is generated by initializeHalfEdgeStructures function */ - private: - - /*! number of half edges used by faces */ - size_t numHalfEdges; - - /*! fast lookup table to find the first half edge for some face */ - mvector<uint32_t> faceStartEdge; - - /*! fast lookup table to find the face for some half edge */ - mvector<uint32_t> halfEdgeFace; - - /*! set with all holes */ - parallel_set<uint32_t> holeSet; - - /*! fast lookup table to detect invalid faces */ - mvector<int8_t> invalid_face; - - /*! test if face i is invalid in timestep j */ - __forceinline int8_t& invalidFace(size_t i, size_t j = 0) { return invalid_face[i*numTimeSteps+j]; } - __forceinline const int8_t& invalidFace(size_t i, size_t j = 0) const { return invalid_face[i*numTimeSteps+j]; } - - /*! interpolation cache */ - public: - static __forceinline size_t numInterpolationSlots4(size_t stride) { return (stride+15)/16; } - static __forceinline size_t numInterpolationSlots8(size_t stride) { return (stride+31)/32; } - static __forceinline size_t interpolationSlot(size_t prim, size_t slot, size_t stride) { - const size_t slots = numInterpolationSlots4(stride); - assert(slot < slots); - return slots*prim+slot; - } - std::vector<std::vector<SharedLazyTessellationCache::CacheEntry>> vertex_buffer_tags; - std::vector<std::vector<SharedLazyTessellationCache::CacheEntry>> vertex_attrib_buffer_tags; - std::vector<Patch3fa::Ref> patch_eval_trees; - - /*! the following data is only required during construction of the - * half edge structure and can be cleared for static scenes */ - private: - - /*! map with all vertex creases */ - parallel_map<uint32_t,float> vertexCreaseMap; - - /*! map with all edge creases */ - parallel_map<uint64_t,float> edgeCreaseMap; - - protected: - - /*! counts number of geometry commits */ - size_t commitCounter; - }; - - namespace isa - { - struct SubdivMeshISA : public SubdivMesh - { - SubdivMeshISA (Device* device) - : SubdivMesh(device) {} - - void interpolate(const RTCInterpolateArguments* const args); - void interpolateN(const RTCInterpolateNArguments* const args); - }; - } - - DECLARE_ISA_FUNCTION(SubdivMesh*, createSubdivMesh, Device*); -}; diff --git a/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.cpp b/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.cpp deleted file mode 100644 index d1c2750f14..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.cpp +++ /dev/null @@ -1,243 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "scene_triangle_mesh.h" -#include "scene.h" - -namespace embree -{ -#if defined(EMBREE_LOWEST_ISA) - - TriangleMesh::TriangleMesh (Device* device) - : Geometry(device,GTY_TRIANGLE_MESH,0,1) - { - vertices.resize(numTimeSteps); - } - - void TriangleMesh::setMask (unsigned mask) - { - this->mask = mask; - Geometry::update(); - } - - void TriangleMesh::setNumTimeSteps (unsigned int numTimeSteps) - { - vertices.resize(numTimeSteps); - Geometry::setNumTimeSteps(numTimeSteps); - } - - void TriangleMesh::setVertexAttributeCount (unsigned int N) - { - vertexAttribs.resize(N); - Geometry::update(); - } - - void TriangleMesh::setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num) - { - /* verify that all accesses are 4 bytes aligned */ - if (((size_t(buffer->getPtr()) + offset) & 0x3) || (stride & 0x3)) - throw_RTCError(RTC_ERROR_INVALID_OPERATION, "data must be 4 bytes aligned"); - - if (type == RTC_BUFFER_TYPE_VERTEX) - { - if (format != RTC_FORMAT_FLOAT3) - throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid vertex buffer format"); - - /* if buffer is larger than 16GB the premultiplied index optimization does not work */ - if (stride*num > 16ll*1024ll*1024ll*1024ll) - throw_RTCError(RTC_ERROR_INVALID_OPERATION, "vertex buffer can be at most 16GB large"); - - if (slot >= vertices.size()) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid vertex buffer slot"); - - vertices[slot].set(buffer, offset, stride, num, format); - vertices[slot].checkPadding16(); - vertices0 = vertices[0]; - } - else if (type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) - { - if (format < RTC_FORMAT_FLOAT || format > RTC_FORMAT_FLOAT16) - throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid vertex attribute buffer format"); - - if (slot >= vertexAttribs.size()) - throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid vertex attribute buffer slot"); - - vertexAttribs[slot].set(buffer, offset, stride, num, format); - vertexAttribs[slot].checkPadding16(); - } - else if (type == RTC_BUFFER_TYPE_INDEX) - { - if (slot != 0) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot"); - if (format != RTC_FORMAT_UINT3) - throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid index buffer format"); - - triangles.set(buffer, offset, stride, num, format); - setNumPrimitives(num); - } - else - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown buffer type"); - } - - void* TriangleMesh::getBuffer(RTCBufferType type, unsigned int slot) - { - if (type == RTC_BUFFER_TYPE_INDEX) - { - if (slot != 0) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot"); - return triangles.getPtr(); - } - else if (type == RTC_BUFFER_TYPE_VERTEX) - { - if (slot >= vertices.size()) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot"); - return vertices[slot].getPtr(); - } - else if (type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) - { - if (slot >= vertexAttribs.size()) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot"); - return vertexAttribs[slot].getPtr(); - } - else - { - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown buffer type"); - return nullptr; - } - } - - void TriangleMesh::updateBuffer(RTCBufferType type, unsigned int slot) - { - if (type == RTC_BUFFER_TYPE_INDEX) - { - if (slot != 0) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot"); - triangles.setModified(); - } - else if (type == RTC_BUFFER_TYPE_VERTEX) - { - if (slot >= vertices.size()) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot"); - vertices[slot].setModified(); - } - else if (type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) - { - if (slot >= vertexAttribs.size()) - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot"); - vertexAttribs[slot].setModified(); - } - else - { - throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown buffer type"); - } - - Geometry::update(); - } - - void TriangleMesh::commit() - { - /* verify that stride of all time steps are identical */ - for (unsigned int t=0; t<numTimeSteps; t++) - if (vertices[t].getStride() != vertices[0].getStride()) - throw_RTCError(RTC_ERROR_INVALID_OPERATION,"stride of vertex buffers have to be identical for each time step"); - - Geometry::commit(); - } - - void TriangleMesh::addElementsToCount (GeometryCounts & counts) const - { - if (numTimeSteps == 1) counts.numTriangles += numPrimitives; - else counts.numMBTriangles += numPrimitives; - } - - bool TriangleMesh::verify() - { - /*! verify size of vertex arrays */ - if (vertices.size() == 0) return false; - for (const auto& buffer : vertices) - if (buffer.size() != numVertices()) - return false; - - /*! verify size of user vertex arrays */ - for (const auto& buffer : vertexAttribs) - if (buffer.size() != numVertices()) - return false; - - /*! verify triangle indices */ - for (size_t i=0; i<size(); i++) { - if (triangles[i].v[0] >= numVertices()) return false; - if (triangles[i].v[1] >= numVertices()) return false; - if (triangles[i].v[2] >= numVertices()) return false; - } - - /*! verify vertices */ - for (const auto& buffer : vertices) - for (size_t i=0; i<buffer.size(); i++) - if (!isvalid(buffer[i])) - return false; - - return true; - } - - void TriangleMesh::interpolate(const RTCInterpolateArguments* const args) - { - unsigned int primID = args->primID; - float u = args->u; - float v = args->v; - RTCBufferType bufferType = args->bufferType; - unsigned int bufferSlot = args->bufferSlot; - float* P = args->P; - float* dPdu = args->dPdu; - float* dPdv = args->dPdv; - float* ddPdudu = args->ddPdudu; - float* ddPdvdv = args->ddPdvdv; - float* ddPdudv = args->ddPdudv; - unsigned int valueCount = args->valueCount; - - /* calculate base pointer and stride */ - assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) || - (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size())); - const char* src = nullptr; - size_t stride = 0; - if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) { - src = vertexAttribs[bufferSlot].getPtr(); - stride = vertexAttribs[bufferSlot].getStride(); - } else { - src = vertices[bufferSlot].getPtr(); - stride = vertices[bufferSlot].getStride(); - } - - for (unsigned int i=0; i<valueCount; i+=4) - { - size_t ofs = i*sizeof(float); - const float w = 1.0f-u-v; - const Triangle& tri = triangle(primID); - const vbool4 valid = vint4((int)i)+vint4(step) < vint4(int(valueCount)); - const vfloat4 p0 = vfloat4::loadu(valid,(float*)&src[tri.v[0]*stride+ofs]); - const vfloat4 p1 = vfloat4::loadu(valid,(float*)&src[tri.v[1]*stride+ofs]); - const vfloat4 p2 = vfloat4::loadu(valid,(float*)&src[tri.v[2]*stride+ofs]); - - if (P) { - vfloat4::storeu(valid,P+i,madd(w,p0,madd(u,p1,v*p2))); - } - if (dPdu) { - assert(dPdu); vfloat4::storeu(valid,dPdu+i,p1-p0); - assert(dPdv); vfloat4::storeu(valid,dPdv+i,p2-p0); - } - if (ddPdudu) { - assert(ddPdudu); vfloat4::storeu(valid,ddPdudu+i,vfloat4(zero)); - assert(ddPdvdv); vfloat4::storeu(valid,ddPdvdv+i,vfloat4(zero)); - assert(ddPdudv); vfloat4::storeu(valid,ddPdudv+i,vfloat4(zero)); - } - } - } - -#endif - - namespace isa - { - TriangleMesh* createTriangleMesh(Device* device) { - return new TriangleMeshISA(device); - } - } -} diff --git a/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.h b/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.h deleted file mode 100644 index eaf2e1799a..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.h +++ /dev/null @@ -1,264 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "geometry.h" -#include "buffer.h" - -namespace embree -{ - /*! Triangle Mesh */ - struct TriangleMesh : public Geometry - { - /*! type of this geometry */ - static const Geometry::GTypeMask geom_type = Geometry::MTY_TRIANGLE_MESH; - - /*! triangle indices */ - struct Triangle - { - uint32_t v[3]; - - /*! outputs triangle indices */ - __forceinline friend embree_ostream operator<<(embree_ostream cout, const Triangle& t) { - return cout << "Triangle { " << t.v[0] << ", " << t.v[1] << ", " << t.v[2] << " }"; - } - }; - - public: - - /*! triangle mesh construction */ - TriangleMesh (Device* device); - - /* geometry interface */ - public: - void setMask(unsigned mask); - void setNumTimeSteps (unsigned int numTimeSteps); - void setVertexAttributeCount (unsigned int N); - void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num); - void* getBuffer(RTCBufferType type, unsigned int slot); - void updateBuffer(RTCBufferType type, unsigned int slot); - void commit(); - bool verify(); - void interpolate(const RTCInterpolateArguments* const args); - void addElementsToCount (GeometryCounts & counts) const; - - public: - - /*! returns number of vertices */ - __forceinline size_t numVertices() const { - return vertices[0].size(); - } - - /*! returns i'th triangle*/ - __forceinline const Triangle& triangle(size_t i) const { - return triangles[i]; - } - - /*! returns i'th vertex of the first time step */ - __forceinline const Vec3fa vertex(size_t i) const { - return vertices0[i]; - } - - /*! returns i'th vertex of the first time step */ - __forceinline const char* vertexPtr(size_t i) const { - return vertices0.getPtr(i); - } - - /*! returns i'th vertex of itime'th timestep */ - __forceinline const Vec3fa vertex(size_t i, size_t itime) const { - return vertices[itime][i]; - } - - /*! returns i'th vertex of itime'th timestep */ - __forceinline const char* vertexPtr(size_t i, size_t itime) const { - return vertices[itime].getPtr(i); - } - - /*! calculates the bounds of the i'th triangle */ - __forceinline BBox3fa bounds(size_t i) const - { - const Triangle& tri = triangle(i); - const Vec3fa v0 = vertex(tri.v[0]); - const Vec3fa v1 = vertex(tri.v[1]); - const Vec3fa v2 = vertex(tri.v[2]); - return BBox3fa(min(v0,v1,v2),max(v0,v1,v2)); - } - - /*! calculates the bounds of the i'th triangle at the itime'th timestep */ - __forceinline BBox3fa bounds(size_t i, size_t itime) const - { - const Triangle& tri = triangle(i); - const Vec3fa v0 = vertex(tri.v[0],itime); - const Vec3fa v1 = vertex(tri.v[1],itime); - const Vec3fa v2 = vertex(tri.v[2],itime); - return BBox3fa(min(v0,v1,v2),max(v0,v1,v2)); - } - - /*! check if the i'th primitive is valid at the itime'th timestep */ - __forceinline bool valid(size_t i, size_t itime) const { - return valid(i, make_range(itime, itime)); - } - - /*! check if the i'th primitive is valid between the specified time range */ - __forceinline bool valid(size_t i, const range<size_t>& itime_range) const - { - const Triangle& tri = triangle(i); - if (unlikely(tri.v[0] >= numVertices())) return false; - if (unlikely(tri.v[1] >= numVertices())) return false; - if (unlikely(tri.v[2] >= numVertices())) return false; - - for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++) - { - if (!isvalid(vertex(tri.v[0],itime))) return false; - if (!isvalid(vertex(tri.v[1],itime))) return false; - if (!isvalid(vertex(tri.v[2],itime))) return false; - } - - return true; - } - - /*! calculates the linear bounds of the i'th primitive at the itimeGlobal'th time segment */ - __forceinline LBBox3fa linearBounds(size_t i, size_t itime) const { - return LBBox3fa(bounds(i,itime+0),bounds(i,itime+1)); - } - - /*! calculates the build bounds of the i'th primitive, if it's valid */ - __forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const - { - const Triangle& tri = triangle(i); - if (unlikely(tri.v[0] >= numVertices())) return false; - if (unlikely(tri.v[1] >= numVertices())) return false; - if (unlikely(tri.v[2] >= numVertices())) return false; - - for (size_t t=0; t<numTimeSteps; t++) - { - const Vec3fa v0 = vertex(tri.v[0],t); - const Vec3fa v1 = vertex(tri.v[1],t); - const Vec3fa v2 = vertex(tri.v[2],t); - if (unlikely(!isvalid(v0) || !isvalid(v1) || !isvalid(v2))) - return false; - } - - if (likely(bbox)) - *bbox = bounds(i); - - return true; - } - - /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */ - __forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const - { - const Triangle& tri = triangle(i); - if (unlikely(tri.v[0] >= numVertices())) return false; - if (unlikely(tri.v[1] >= numVertices())) return false; - if (unlikely(tri.v[2] >= numVertices())) return false; - - assert(itime+1 < numTimeSteps); - const Vec3fa a0 = vertex(tri.v[0],itime+0); if (unlikely(!isvalid(a0))) return false; - const Vec3fa a1 = vertex(tri.v[1],itime+0); if (unlikely(!isvalid(a1))) return false; - const Vec3fa a2 = vertex(tri.v[2],itime+0); if (unlikely(!isvalid(a2))) return false; - const Vec3fa b0 = vertex(tri.v[0],itime+1); if (unlikely(!isvalid(b0))) return false; - const Vec3fa b1 = vertex(tri.v[1],itime+1); if (unlikely(!isvalid(b1))) return false; - const Vec3fa b2 = vertex(tri.v[2],itime+1); if (unlikely(!isvalid(b2))) return false; - - /* use bounds of first time step in builder */ - bbox = BBox3fa(min(a0,a1,a2),max(a0,a1,a2)); - return true; - } - - /*! calculates the linear bounds of the i'th primitive for the specified time range */ - __forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const { - return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments); - } - - /*! calculates the linear bounds of the i'th primitive for the specified time range */ - __forceinline bool linearBounds(size_t i, const BBox1f& dt, LBBox3fa& bbox) const { - if (!valid(i, timeSegmentRange(dt))) return false; - bbox = linearBounds(i, dt); - return true; - } - - /*! get fast access to first vertex buffer */ - __forceinline float * getCompactVertexArray () const { - return (float*) vertices0.getPtr(); - } - - /* gets version info of topology */ - unsigned int getTopologyVersion() const { - return triangles.modCounter; - } - - /* returns true if topology changed */ - bool topologyChanged(unsigned int otherVersion) const { - return triangles.isModified(otherVersion); // || numPrimitivesChanged; - } - - /* returns the projected area */ - __forceinline float projectedPrimitiveArea(const size_t i) const { - const Triangle& tri = triangle(i); - const Vec3fa v0 = vertex(tri.v[0]); - const Vec3fa v1 = vertex(tri.v[1]); - const Vec3fa v2 = vertex(tri.v[2]); - return areaProjectedTriangle(v0,v1,v2); - } - - public: - BufferView<Triangle> triangles; //!< array of triangles - BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer - vector<BufferView<Vec3fa>> vertices; //!< vertex array for each timestep - vector<RawBufferView> vertexAttribs; //!< vertex attributes - }; - - namespace isa - { - struct TriangleMeshISA : public TriangleMesh - { - TriangleMeshISA (Device* device) - : TriangleMesh(device) {} - - PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const - { - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - BBox3fa bounds = empty; - if (!buildBounds(j,&bounds)) continue; - const PrimRef prim(bounds,geomID,unsigned(j)); - pinfo.add_center2(prim); - prims[k++] = prim; - } - return pinfo; - } - - PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const - { - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - BBox3fa bounds = empty; - if (!buildBounds(j,itime,bounds)) continue; - const PrimRef prim(bounds,geomID,unsigned(j)); - pinfo.add_center2(prim); - prims[k++] = prim; - } - return pinfo; - } - - PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const - { - PrimInfoMB pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - if (!valid(j, timeSegmentRange(t0t1))) continue; - const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j)); - pinfo.add_primref(prim); - prims[k++] = prim; - } - return pinfo; - } - }; - } - - DECLARE_ISA_FUNCTION(TriangleMesh*, createTriangleMesh, Device*); -} diff --git a/thirdparty/embree-aarch64/kernels/common/scene_user_geometry.h b/thirdparty/embree-aarch64/kernels/common/scene_user_geometry.h deleted file mode 100644 index 8d11ed6986..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/scene_user_geometry.h +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "accelset.h" - -namespace embree -{ - /*! User geometry with user defined intersection functions */ - struct UserGeometry : public AccelSet - { - /*! type of this geometry */ - static const Geometry::GTypeMask geom_type = Geometry::MTY_USER_GEOMETRY; - - public: - UserGeometry (Device* device, unsigned int items = 0, unsigned int numTimeSteps = 1); - virtual void setMask (unsigned mask); - virtual void setBoundsFunction (RTCBoundsFunction bounds, void* userPtr); - virtual void setIntersectFunctionN (RTCIntersectFunctionN intersect); - virtual void setOccludedFunctionN (RTCOccludedFunctionN occluded); - virtual void build() {} - virtual void addElementsToCount (GeometryCounts & counts) const; - }; - - namespace isa - { - struct UserGeometryISA : public UserGeometry - { - UserGeometryISA (Device* device) - : UserGeometry(device) {} - - PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const - { - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - BBox3fa bounds = empty; - if (!buildBounds(j,&bounds)) continue; - const PrimRef prim(bounds,geomID,unsigned(j)); - pinfo.add_center2(prim); - prims[k++] = prim; - } - return pinfo; - } - - PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const - { - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - BBox3fa bounds = empty; - if (!buildBounds(j,itime,bounds)) continue; - const PrimRef prim(bounds,geomID,unsigned(j)); - pinfo.add_center2(prim); - prims[k++] = prim; - } - return pinfo; - } - - PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const - { - PrimInfoMB pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - if (!valid(j, timeSegmentRange(t0t1))) continue; - const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j)); - pinfo.add_primref(prim); - prims[k++] = prim; - } - return pinfo; - } - }; - } - - DECLARE_ISA_FUNCTION(UserGeometry*, createUserGeometry, Device*); -} diff --git a/thirdparty/embree-aarch64/kernels/common/stack_item.h b/thirdparty/embree-aarch64/kernels/common/stack_item.h deleted file mode 100644 index 533c385365..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/stack_item.h +++ /dev/null @@ -1,125 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" - -namespace embree -{ - /*! An item on the stack holds the node ID and distance of that node. */ - template<typename T> - struct __aligned(16) StackItemT - { - /*! assert that the xchg function works */ - static_assert(sizeof(T) <= 12, "sizeof(T) <= 12 failed"); - - __forceinline StackItemT() {} - - __forceinline StackItemT(T &ptr, unsigned &dist) : ptr(ptr), dist(dist) {} - - /*! use SSE instructions to swap stack items */ - __forceinline static void xchg(StackItemT& a, StackItemT& b) - { - const vfloat4 sse_a = vfloat4::load((float*)&a); - const vfloat4 sse_b = vfloat4::load((float*)&b); - vfloat4::store(&a,sse_b); - vfloat4::store(&b,sse_a); - } - - /*! Sort 2 stack items. */ - __forceinline friend void sort(StackItemT& s1, StackItemT& s2) { - if (s2.dist < s1.dist) xchg(s2,s1); - } - - /*! Sort 3 stack items. */ - __forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3) - { - if (s2.dist < s1.dist) xchg(s2,s1); - if (s3.dist < s2.dist) xchg(s3,s2); - if (s2.dist < s1.dist) xchg(s2,s1); - } - - /*! Sort 4 stack items. */ - __forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3, StackItemT& s4) - { - if (s2.dist < s1.dist) xchg(s2,s1); - if (s4.dist < s3.dist) xchg(s4,s3); - if (s3.dist < s1.dist) xchg(s3,s1); - if (s4.dist < s2.dist) xchg(s4,s2); - if (s3.dist < s2.dist) xchg(s3,s2); - } - - /*! use SSE instructions to swap stack items */ - __forceinline static void cmp_xchg(vint4& a, vint4& b) - { -#if defined(__AVX512VL__) - const vboolf4 mask(shuffle<2,2,2,2>(b) < shuffle<2,2,2,2>(a)); -#else - const vboolf4 mask0(b < a); - const vboolf4 mask(shuffle<2,2,2,2>(mask0)); -#endif - const vint4 c = select(mask,b,a); - const vint4 d = select(mask,a,b); - a = c; - b = d; - } - - /*! Sort 3 stack items. */ - __forceinline static void sort3(vint4& s1, vint4& s2, vint4& s3) - { - cmp_xchg(s2,s1); - cmp_xchg(s3,s2); - cmp_xchg(s2,s1); - } - - /*! Sort 4 stack items. */ - __forceinline static void sort4(vint4& s1, vint4& s2, vint4& s3, vint4& s4) - { - cmp_xchg(s2,s1); - cmp_xchg(s4,s3); - cmp_xchg(s3,s1); - cmp_xchg(s4,s2); - cmp_xchg(s3,s2); - } - - - /*! Sort N stack items. */ - __forceinline friend void sort(StackItemT* begin, StackItemT* end) - { - for (StackItemT* i = begin+1; i != end; ++i) - { - const vfloat4 item = vfloat4::load((float*)i); - const unsigned dist = i->dist; - StackItemT* j = i; - - while ((j != begin) && ((j-1)->dist < dist)) - { - vfloat4::store(j, vfloat4::load((float*)(j-1))); - --j; - } - - vfloat4::store(j, item); - } - } - - public: - T ptr; - unsigned dist; - }; - - /*! An item on the stack holds the node ID and active ray mask. */ - template<typename T> - struct __aligned(8) StackItemMaskT - { - T ptr; - size_t mask; - }; - - struct __aligned(8) StackItemMaskCoherent - { - size_t mask; - size_t parent; - size_t child; - }; -} diff --git a/thirdparty/embree-aarch64/kernels/common/stat.cpp b/thirdparty/embree-aarch64/kernels/common/stat.cpp deleted file mode 100644 index b73c3a8c76..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/stat.cpp +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "stat.h" - -namespace embree -{ - Stat Stat::instance; - - Stat::Stat () { - } - - Stat::~Stat () - { -#ifdef EMBREE_STAT_COUNTERS - Stat::print(std::cout); -#endif - } - - void Stat::print(std::ostream& cout) - { - Counters& cntrs = instance.cntrs; - Counters::Data& data = instance.cntrs.code; - //Counters::Data& data = instance.cntrs.active; - - /* print absolute numbers */ - cout << "--------- ABSOLUTE ---------" << std::endl; - cout << " #normal_travs = " << float(data.normal.travs )*1E-6 << "M" << std::endl; - cout << " #nodes = " << float(data.normal.trav_nodes )*1E-6 << "M" << std::endl; - cout << " #nodes_xfm = " << float(data.normal.trav_xfm_nodes )*1E-6 << "M" << std::endl; - cout << " #leaves = " << float(data.normal.trav_leaves )*1E-6 << "M" << std::endl; - cout << " #prims = " << float(data.normal.trav_prims )*1E-6 << "M" << std::endl; - cout << " #prim_hits = " << float(data.normal.trav_prim_hits )*1E-6 << "M" << std::endl; - - cout << " #stack nodes = " << float(data.normal.trav_stack_nodes )*1E-6 << "M" << std::endl; - cout << " #stack pop = " << float(data.normal.trav_stack_pop )*1E-6 << "M" << std::endl; - - size_t normal_box_hits = 0; - size_t weighted_box_hits = 0; - for (size_t i=0;i<SIZE_HISTOGRAM;i++) { - normal_box_hits += data.normal.trav_hit_boxes[i]; - weighted_box_hits += data.normal.trav_hit_boxes[i]*i; - } - cout << " #hit_boxes = " << normal_box_hits << " (total) distribution: "; - float average = 0.0f; - for (size_t i=0;i<SIZE_HISTOGRAM;i++) - { - float value = 100.0f * data.normal.trav_hit_boxes[i] / normal_box_hits; - cout << "[" << i << "] " << value << " "; - average += (float)i*data.normal.trav_hit_boxes[i] / normal_box_hits; - } - cout << " average = " << average << std::endl; - for (size_t i=0;i<SIZE_HISTOGRAM;i++) cout << "[" << i << "] " << 100.0f * data.normal.trav_hit_boxes[i]*i / weighted_box_hits << " "; - cout << std::endl; - - if (data.shadow.travs) { - cout << " #shadow_travs = " << float(data.shadow.travs )*1E-6 << "M" << std::endl; - cout << " #nodes = " << float(data.shadow.trav_nodes )*1E-6 << "M" << std::endl; - cout << " #nodes_xfm = " << float(data.shadow.trav_xfm_nodes)*1E-6 << "M" << std::endl; - cout << " #leaves = " << float(data.shadow.trav_leaves )*1E-6 << "M" << std::endl; - cout << " #prims = " << float(data.shadow.trav_prims )*1E-6 << "M" << std::endl; - cout << " #prim_hits = " << float(data.shadow.trav_prim_hits)*1E-6 << "M" << std::endl; - - cout << " #stack nodes = " << float(data.shadow.trav_stack_nodes )*1E-6 << "M" << std::endl; - cout << " #stack pop = " << float(data.shadow.trav_stack_pop )*1E-6 << "M" << std::endl; - - size_t shadow_box_hits = 0; - size_t weighted_shadow_box_hits = 0; - - for (size_t i=0;i<SIZE_HISTOGRAM;i++) { - shadow_box_hits += data.shadow.trav_hit_boxes[i]; - weighted_shadow_box_hits += data.shadow.trav_hit_boxes[i]*i; - } - cout << " #hit_boxes = "; - for (size_t i=0;i<SIZE_HISTOGRAM;i++) cout << "[" << i << "] " << 100.0f * data.shadow.trav_hit_boxes[i] / shadow_box_hits << " "; - cout << std::endl; - for (size_t i=0;i<SIZE_HISTOGRAM;i++) cout << "[" << i << "] " << 100.0f * data.shadow.trav_hit_boxes[i]*i / weighted_shadow_box_hits << " "; - cout << std::endl; - } - cout << std::endl; - - /* print per traversal numbers */ - cout << "--------- PER TRAVERSAL ---------" << std::endl; - float active_normal_travs = float(cntrs.active.normal.travs )/float(cntrs.all.normal.travs ); - float active_normal_trav_nodes = float(cntrs.active.normal.trav_nodes )/float(cntrs.all.normal.trav_nodes ); - float active_normal_trav_xfm_nodes = float(cntrs.active.normal.trav_xfm_nodes )/float(cntrs.all.normal.trav_xfm_nodes ); - float active_normal_trav_leaves = float(cntrs.active.normal.trav_leaves)/float(cntrs.all.normal.trav_leaves); - float active_normal_trav_prims = float(cntrs.active.normal.trav_prims )/float(cntrs.all.normal.trav_prims ); - float active_normal_trav_prim_hits = float(cntrs.active.normal.trav_prim_hits )/float(cntrs.all.normal.trav_prim_hits ); - float active_normal_trav_stack_pop = float(cntrs.active.normal.trav_stack_pop )/float(cntrs.all.normal.trav_stack_pop ); - - cout << " #normal_travs = " << float(cntrs.code.normal.travs )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_travs << "% active" << std::endl; - cout << " #nodes = " << float(cntrs.code.normal.trav_nodes )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_nodes << "% active" << std::endl; - cout << " #node_xfm = " << float(cntrs.code.normal.trav_xfm_nodes )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_xfm_nodes << "% active" << std::endl; - cout << " #leaves = " << float(cntrs.code.normal.trav_leaves)/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_leaves << "% active" << std::endl; - cout << " #prims = " << float(cntrs.code.normal.trav_prims )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_prims << "% active" << std::endl; - cout << " #prim_hits = " << float(cntrs.code.normal.trav_prim_hits )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_prim_hits << "% active" << std::endl; - cout << " #stack_pop = " << float(cntrs.code.normal.trav_stack_pop )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_stack_pop << "% active" << std::endl; - - if (cntrs.all.shadow.travs) { - float active_shadow_travs = float(cntrs.active.shadow.travs )/float(cntrs.all.shadow.travs ); - float active_shadow_trav_nodes = float(cntrs.active.shadow.trav_nodes )/float(cntrs.all.shadow.trav_nodes ); - float active_shadow_trav_xfm_nodes = float(cntrs.active.shadow.trav_xfm_nodes )/float(cntrs.all.shadow.trav_xfm_nodes ); - float active_shadow_trav_leaves = float(cntrs.active.shadow.trav_leaves)/float(cntrs.all.shadow.trav_leaves); - float active_shadow_trav_prims = float(cntrs.active.shadow.trav_prims )/float(cntrs.all.shadow.trav_prims ); - float active_shadow_trav_prim_hits = float(cntrs.active.shadow.trav_prim_hits )/float(cntrs.all.shadow.trav_prim_hits ); - - cout << " #shadow_travs = " << float(cntrs.code.shadow.travs )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_travs << "% active" << std::endl; - cout << " #nodes = " << float(cntrs.code.shadow.trav_nodes )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_nodes << "% active" << std::endl; - cout << " #nodes_xfm = " << float(cntrs.code.shadow.trav_xfm_nodes )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_xfm_nodes << "% active" << std::endl; - cout << " #leaves = " << float(cntrs.code.shadow.trav_leaves)/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_leaves << "% active" << std::endl; - cout << " #prims = " << float(cntrs.code.shadow.trav_prims )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_prims << "% active" << std::endl; - cout << " #prim_hits = " << float(cntrs.code.shadow.trav_prim_hits )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_prim_hits << "% active" << std::endl; - - } - cout << std::endl; - - /* print user counters for performance tuning */ - cout << "--------- USER ---------" << std::endl; - for (size_t i=0; i<10; i++) - cout << "#user" << i << " = " << float(cntrs.user[i])/float(cntrs.all.normal.travs+cntrs.all.shadow.travs) << " per traversal" << std::endl; - - cout << "#user5/user3 " << 100.0f*float(cntrs.user[5])/float(cntrs.user[3]) << "%" << std::endl; - cout << "#user6/user3 " << 100.0f*float(cntrs.user[6])/float(cntrs.user[3]) << "%" << std::endl; - cout << "#user7/user3 " << 100.0f*float(cntrs.user[7])/float(cntrs.user[3]) << "%" << std::endl; - cout << std::endl; - } -} diff --git a/thirdparty/embree-aarch64/kernels/common/stat.h b/thirdparty/embree-aarch64/kernels/common/stat.h deleted file mode 100644 index 3cda2bd014..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/stat.h +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" - -/* Macros to gather statistics */ -#ifdef EMBREE_STAT_COUNTERS -# define STAT(x) x -# define STAT3(s,x,y,z) \ - STAT(Stat::get().code .s+=x); \ - STAT(Stat::get().active.s+=y); \ - STAT(Stat::get().all .s+=z); -# define STAT_USER(i,x) Stat::get().user[i]+=x; -#else -# define STAT(x) -# define STAT3(s,x,y,z) -# define STAT_USER(i,x) -#endif - -namespace embree -{ - /*! Gathers ray tracing statistics. We count 1) how often a code - * location is reached, 2) how many SIMD lanes are active, 3) how - * many SIMD lanes reach the code location */ - class Stat - { - public: - - static const size_t SIZE_HISTOGRAM = 64+1; - - /*! constructs stat counter class */ - Stat (); - - /*! destructs stat counter class */ - ~Stat (); - - class Counters - { - public: - Counters () { - clear(); - } - - void clear() - { - all.clear(); - active.clear(); - code.clear(); - for (auto& u : user) u.store(0); - } - - public: - - /* per packet and per ray stastics */ - struct Data - { - void clear () { - normal.clear(); - shadow.clear(); - point_query.clear(); - } - - /* normal and shadow ray statistics */ - struct - { - void clear() - { - travs.store(0); - trav_nodes.store(0); - trav_leaves.store(0); - trav_prims.store(0); - trav_prim_hits.store(0); - for (auto& v : trav_hit_boxes) v.store(0); - trav_stack_pop.store(0); - trav_stack_nodes.store(0); - trav_xfm_nodes.store(0); - } - - public: - std::atomic<size_t> travs; - std::atomic<size_t> trav_nodes; - std::atomic<size_t> trav_leaves; - std::atomic<size_t> trav_prims; - std::atomic<size_t> trav_prim_hits; - std::atomic<size_t> trav_hit_boxes[SIZE_HISTOGRAM+1]; - std::atomic<size_t> trav_stack_pop; - std::atomic<size_t> trav_stack_nodes; - std::atomic<size_t> trav_xfm_nodes; - - } normal, shadow, point_query; - } all, active, code; - - std::atomic<size_t> user[10]; - }; - - public: - - static __forceinline Counters& get() { - return instance.cntrs; - } - - static void clear() { - instance.cntrs.clear(); - } - - static void print(embree_ostream cout); - - private: - Counters cntrs; - - private: - static Stat instance; - }; -} diff --git a/thirdparty/embree-aarch64/kernels/common/state.cpp b/thirdparty/embree-aarch64/kernels/common/state.cpp deleted file mode 100644 index 51fc9b7826..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/state.cpp +++ /dev/null @@ -1,543 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "state.h" -#include "../../common/lexers/streamfilters.h" - -namespace embree -{ - MutexSys g_printMutex; - - State::ErrorHandler State::g_errorHandler; - - State::ErrorHandler::ErrorHandler() - : thread_error(createTls()) {} - - State::ErrorHandler::~ErrorHandler() - { - Lock<MutexSys> lock(errors_mutex); - for (size_t i=0; i<thread_errors.size(); i++) - delete thread_errors[i]; - destroyTls(thread_error); - thread_errors.clear(); - } - - RTCError* State::ErrorHandler::error() - { - RTCError* stored_error = (RTCError*) getTls(thread_error); - if (stored_error) return stored_error; - - Lock<MutexSys> lock(errors_mutex); - stored_error = new RTCError(RTC_ERROR_NONE); - thread_errors.push_back(stored_error); - setTls(thread_error,stored_error); - return stored_error; - } - - State::State () - : enabled_cpu_features(getCPUFeatures()), - enabled_builder_cpu_features(enabled_cpu_features), - frequency_level(FREQUENCY_SIMD256) - { - tri_accel = "default"; - tri_builder = "default"; - tri_traverser = "default"; - - tri_accel_mb = "default"; - tri_builder_mb = "default"; - tri_traverser_mb = "default"; - - quad_accel = "default"; - quad_builder = "default"; - quad_traverser = "default"; - - quad_accel_mb = "default"; - quad_builder_mb = "default"; - quad_traverser_mb = "default"; - - line_accel = "default"; - line_builder = "default"; - line_traverser = "default"; - - line_accel_mb = "default"; - line_builder_mb = "default"; - line_traverser_mb = "default"; - - hair_accel = "default"; - hair_builder = "default"; - hair_traverser = "default"; - - hair_accel_mb = "default"; - hair_builder_mb = "default"; - hair_traverser_mb = "default"; - - object_accel = "default"; - object_builder = "default"; - object_accel_min_leaf_size = 1; - object_accel_max_leaf_size = 1; - - object_accel_mb = "default"; - object_builder_mb = "default"; - object_accel_mb_min_leaf_size = 1; - object_accel_mb_max_leaf_size = 1; - - max_spatial_split_replications = 1.2f; - useSpatialPreSplits = false; - - tessellation_cache_size = 128*1024*1024; - - subdiv_accel = "default"; - subdiv_accel_mb = "default"; - - grid_accel = "default"; - grid_builder = "default"; - grid_accel_mb = "default"; - grid_builder_mb = "default"; - - instancing_open_min = 0; - instancing_block_size = 0; - instancing_open_factor = 8.0f; - instancing_open_max_depth = 32; - instancing_open_max = 50000000; - - ignore_config_files = false; - float_exceptions = false; - quality_flags = -1; - scene_flags = -1; - verbose = 0; - benchmark = 0; - - numThreads = 0; - numUserThreads = 0; - -#if TASKING_INTERNAL - set_affinity = true; -#else - set_affinity = false; -#endif - /* per default enable affinity on KNL */ - if (hasISA(AVX512KNL)) set_affinity = true; - - start_threads = false; - enable_selockmemoryprivilege = false; -#if defined(__LINUX__) - hugepages = true; -#else - hugepages = false; -#endif - hugepages_success = true; - - alloc_main_block_size = 0; - alloc_num_main_slots = 0; - alloc_thread_block_size = 0; - alloc_single_thread_alloc = -1; - - error_function = nullptr; - error_function_userptr = nullptr; - - memory_monitor_function = nullptr; - memory_monitor_userptr = nullptr; - } - - State::~State() { - } - - bool State::hasISA(const int isa) { - return (enabled_cpu_features & isa) == isa; - } - - bool State::checkISASupport() { -#if defined(__ARM_NEON) - /* - * NEON CPU type is a mixture of NEON and SSE2 - */ - - bool hasSSE2 = (getCPUFeatures() & enabled_cpu_features) & CPU_FEATURE_SSE2; - - /* this will be true when explicitly initialize Device with `isa=neon` config */ - bool hasNEON = (getCPUFeatures() & enabled_cpu_features) & CPU_FEATURE_NEON; - - return hasSSE2 || hasNEON; -#else - return (getCPUFeatures() & enabled_cpu_features) == enabled_cpu_features; -#endif - } - - void State::verify() - { - /* verify that calculations stay in range */ - assert(rcp(min_rcp_input)*FLT_LARGE+FLT_LARGE < 0.01f*FLT_MAX); - - /* here we verify that CPP files compiled for a specific ISA only - * call that same or lower ISA version of non-inlined class member - * functions */ -#if defined(DEBUG) -#if defined(EMBREE_TARGET_SSE2) -#if !defined(__ARM_NEON) - assert(sse2::getISA() <= SSE2); -#endif -#endif -#if defined(EMBREE_TARGET_SSE42) - assert(sse42::getISA() <= SSE42); -#endif -#if defined(EMBREE_TARGET_AVX) - assert(avx::getISA() <= AVX); -#endif -#if defined(EMBREE_TARGET_AVX2) - assert(avx2::getISA() <= AVX2); -#endif -#if defined (EMBREE_TARGET_AVX512KNL) - assert(avx512knl::getISA() <= AVX512KNL); -#endif -#if defined (EMBREE_TARGET_AVX512SKX) - assert(avx512skx::getISA() <= AVX512SKX); -#endif -#endif - } - - const char* symbols[3] = { "=", ",", "|" }; - - bool State::parseFile(const FileName& fileName) - { - FILE* f = fopen(fileName.c_str(),"r"); - if (!f) return false; - Ref<Stream<int> > file = new FileStream(f,fileName); - - std::vector<std::string> syms; - for (size_t i=0; i<sizeof(symbols)/sizeof(void*); i++) - syms.push_back(symbols[i]); - - Ref<TokenStream> cin = new TokenStream(new LineCommentFilter(file,"#"), - TokenStream::alpha+TokenStream::ALPHA+TokenStream::numbers+"_.", - TokenStream::separators,syms); - parse(cin); - return true; - } - - void State::parseString(const char* cfg) - { - if (cfg == nullptr) return; - - std::vector<std::string> syms; - for (size_t i=0; i<sizeof(symbols)/sizeof(void*); i++) - syms.push_back(symbols[i]); - - Ref<TokenStream> cin = new TokenStream(new StrStream(cfg), - TokenStream::alpha+TokenStream::ALPHA+TokenStream::numbers+"_.", - TokenStream::separators,syms); - parse(cin); - } - - int string_to_cpufeatures(const std::string& isa) - { - if (isa == "sse" ) return SSE; - else if (isa == "sse2") return SSE2; - else if (isa == "sse3") return SSE3; - else if (isa == "ssse3") return SSSE3; - else if (isa == "sse41") return SSE41; - else if (isa == "sse4.1") return SSE41; - else if (isa == "sse42") return SSE42; - else if (isa == "sse4.2") return SSE42; - else if (isa == "avx") return AVX; - else if (isa == "avxi") return AVXI; - else if (isa == "avx2") return AVX2; - else if (isa == "avx512knl") return AVX512KNL; - else if (isa == "avx512skx") return AVX512SKX; - else return SSE2; - } - - void State::parse(Ref<TokenStream> cin) - { - /* parse until end of stream */ - while (cin->peek() != Token::Eof()) - { - const Token tok = cin->get(); - - if (tok == Token::Id("threads") && cin->trySymbol("=")) - numThreads = cin->get().Int(); - - else if (tok == Token::Id("user_threads")&& cin->trySymbol("=")) - numUserThreads = cin->get().Int(); - - else if (tok == Token::Id("set_affinity")&& cin->trySymbol("=")) - set_affinity = cin->get().Int(); - - else if (tok == Token::Id("affinity")&& cin->trySymbol("=")) - set_affinity = cin->get().Int(); - - else if (tok == Token::Id("start_threads")&& cin->trySymbol("=")) - start_threads = cin->get().Int(); - - else if (tok == Token::Id("isa") && cin->trySymbol("=")) { - std::string isa = toLowerCase(cin->get().Identifier()); - enabled_cpu_features = string_to_cpufeatures(isa); - enabled_builder_cpu_features = enabled_cpu_features; - } - - else if (tok == Token::Id("max_isa") && cin->trySymbol("=")) { - std::string isa = toLowerCase(cin->get().Identifier()); - enabled_cpu_features &= string_to_cpufeatures(isa); - enabled_builder_cpu_features &= enabled_cpu_features; - } - - else if (tok == Token::Id("max_builder_isa") && cin->trySymbol("=")) { - std::string isa = toLowerCase(cin->get().Identifier()); - enabled_builder_cpu_features &= string_to_cpufeatures(isa); - } - - else if (tok == Token::Id("frequency_level") && cin->trySymbol("=")) { - std::string freq = cin->get().Identifier(); - if (freq == "simd128") frequency_level = FREQUENCY_SIMD128; - else if (freq == "simd256") frequency_level = FREQUENCY_SIMD256; - else if (freq == "simd512") frequency_level = FREQUENCY_SIMD512; - } - - else if (tok == Token::Id("enable_selockmemoryprivilege") && cin->trySymbol("=")) { - enable_selockmemoryprivilege = cin->get().Int(); - } - else if (tok == Token::Id("hugepages") && cin->trySymbol("=")) { - hugepages = cin->get().Int(); - } - - else if (tok == Token::Id("ignore_config_files") && cin->trySymbol("=")) - ignore_config_files = cin->get().Int(); - else if (tok == Token::Id("float_exceptions") && cin->trySymbol("=")) - float_exceptions = cin->get().Int(); - - else if ((tok == Token::Id("tri_accel") || tok == Token::Id("accel")) && cin->trySymbol("=")) - tri_accel = cin->get().Identifier(); - else if ((tok == Token::Id("tri_builder") || tok == Token::Id("builder")) && cin->trySymbol("=")) - tri_builder = cin->get().Identifier(); - else if ((tok == Token::Id("tri_traverser") || tok == Token::Id("traverser")) && cin->trySymbol("=")) - tri_traverser = cin->get().Identifier(); - - else if ((tok == Token::Id("tri_accel_mb") || tok == Token::Id("accel_mb")) && cin->trySymbol("=")) - tri_accel_mb = cin->get().Identifier(); - else if ((tok == Token::Id("tri_builder_mb") || tok == Token::Id("builder_mb")) && cin->trySymbol("=")) - tri_builder_mb = cin->get().Identifier(); - else if ((tok == Token::Id("tri_traverser_mb") || tok == Token::Id("traverser_mb")) && cin->trySymbol("=")) - tri_traverser_mb = cin->get().Identifier(); - - else if ((tok == Token::Id("quad_accel")) && cin->trySymbol("=")) - quad_accel = cin->get().Identifier(); - else if ((tok == Token::Id("quad_builder")) && cin->trySymbol("=")) - quad_builder = cin->get().Identifier(); - else if ((tok == Token::Id("quad_traverser")) && cin->trySymbol("=")) - quad_traverser = cin->get().Identifier(); - - else if ((tok == Token::Id("quad_accel_mb")) && cin->trySymbol("=")) - quad_accel_mb = cin->get().Identifier(); - else if ((tok == Token::Id("quad_builder_mb")) && cin->trySymbol("=")) - quad_builder_mb = cin->get().Identifier(); - else if ((tok == Token::Id("quad_traverser_mb")) && cin->trySymbol("=")) - quad_traverser_mb = cin->get().Identifier(); - - else if ((tok == Token::Id("line_accel")) && cin->trySymbol("=")) - line_accel = cin->get().Identifier(); - else if ((tok == Token::Id("line_builder")) && cin->trySymbol("=")) - line_builder = cin->get().Identifier(); - else if ((tok == Token::Id("line_traverser")) && cin->trySymbol("=")) - line_traverser = cin->get().Identifier(); - - else if ((tok == Token::Id("line_accel_mb")) && cin->trySymbol("=")) - line_accel_mb = cin->get().Identifier(); - else if ((tok == Token::Id("line_builder_mb")) && cin->trySymbol("=")) - line_builder_mb = cin->get().Identifier(); - else if ((tok == Token::Id("line_traverser_mb")) && cin->trySymbol("=")) - line_traverser_mb = cin->get().Identifier(); - - else if (tok == Token::Id("hair_accel") && cin->trySymbol("=")) - hair_accel = cin->get().Identifier(); - else if (tok == Token::Id("hair_builder") && cin->trySymbol("=")) - hair_builder = cin->get().Identifier(); - else if (tok == Token::Id("hair_traverser") && cin->trySymbol("=")) - hair_traverser = cin->get().Identifier(); - - else if (tok == Token::Id("hair_accel_mb") && cin->trySymbol("=")) - hair_accel_mb = cin->get().Identifier(); - else if (tok == Token::Id("hair_builder_mb") && cin->trySymbol("=")) - hair_builder_mb = cin->get().Identifier(); - else if (tok == Token::Id("hair_traverser_mb") && cin->trySymbol("=")) - hair_traverser_mb = cin->get().Identifier(); - - else if (tok == Token::Id("object_accel") && cin->trySymbol("=")) - object_accel = cin->get().Identifier(); - else if (tok == Token::Id("object_builder") && cin->trySymbol("=")) - object_builder = cin->get().Identifier(); - else if (tok == Token::Id("object_accel_min_leaf_size") && cin->trySymbol("=")) - object_accel_min_leaf_size = cin->get().Int(); - else if (tok == Token::Id("object_accel_max_leaf_size") && cin->trySymbol("=")) - object_accel_max_leaf_size = cin->get().Int(); - - else if (tok == Token::Id("object_accel_mb") && cin->trySymbol("=")) - object_accel_mb = cin->get().Identifier(); - else if (tok == Token::Id("object_builder_mb") && cin->trySymbol("=")) - object_builder_mb = cin->get().Identifier(); - else if (tok == Token::Id("object_accel_mb_min_leaf_size") && cin->trySymbol("=")) - object_accel_mb_min_leaf_size = cin->get().Int(); - else if (tok == Token::Id("object_accel_mb_max_leaf_size") && cin->trySymbol("=")) - object_accel_mb_max_leaf_size = cin->get().Int(); - - else if (tok == Token::Id("instancing_open_min") && cin->trySymbol("=")) - instancing_open_min = cin->get().Int(); - else if (tok == Token::Id("instancing_block_size") && cin->trySymbol("=")) { - instancing_block_size = cin->get().Int(); - instancing_open_factor = 0.0f; - } - else if (tok == Token::Id("instancing_open_max_depth") && cin->trySymbol("=")) - instancing_open_max_depth = cin->get().Int(); - else if (tok == Token::Id("instancing_open_factor") && cin->trySymbol("=")) { - instancing_block_size = 0; - instancing_open_factor = cin->get().Float(); - } - else if (tok == Token::Id("instancing_open_max") && cin->trySymbol("=")) - instancing_open_max = cin->get().Int(); - - else if (tok == Token::Id("subdiv_accel") && cin->trySymbol("=")) - subdiv_accel = cin->get().Identifier(); - else if (tok == Token::Id("subdiv_accel_mb") && cin->trySymbol("=")) - subdiv_accel_mb = cin->get().Identifier(); - - else if (tok == Token::Id("grid_accel") && cin->trySymbol("=")) - grid_accel = cin->get().Identifier(); - else if (tok == Token::Id("grid_accel_mb") && cin->trySymbol("=")) - grid_accel_mb = cin->get().Identifier(); - - else if (tok == Token::Id("verbose") && cin->trySymbol("=")) - verbose = cin->get().Int(); - else if (tok == Token::Id("benchmark") && cin->trySymbol("=")) - benchmark = cin->get().Int(); - - else if (tok == Token::Id("quality")) { - if (cin->trySymbol("=")) { - Token flag = cin->get(); - if (flag == Token::Id("low")) quality_flags = RTC_BUILD_QUALITY_LOW; - else if (flag == Token::Id("medium")) quality_flags = RTC_BUILD_QUALITY_MEDIUM; - else if (flag == Token::Id("high")) quality_flags = RTC_BUILD_QUALITY_HIGH; - } - } - - else if (tok == Token::Id("scene_flags")) { - scene_flags = 0; - if (cin->trySymbol("=")) { - do { - Token flag = cin->get(); - if (flag == Token::Id("dynamic") ) scene_flags |= RTC_SCENE_FLAG_DYNAMIC; - else if (flag == Token::Id("compact")) scene_flags |= RTC_SCENE_FLAG_COMPACT; - else if (flag == Token::Id("robust")) scene_flags |= RTC_SCENE_FLAG_ROBUST; - } while (cin->trySymbol("|")); - } - } - - else if (tok == Token::Id("max_spatial_split_replications") && cin->trySymbol("=")) - max_spatial_split_replications = cin->get().Float(); - - else if (tok == Token::Id("presplits") && cin->trySymbol("=")) - useSpatialPreSplits = cin->get().Int() != 0 ? true : false; - - else if (tok == Token::Id("tessellation_cache_size") && cin->trySymbol("=")) - tessellation_cache_size = size_t(cin->get().Float()*1024.0f*1024.0f); - else if (tok == Token::Id("cache_size") && cin->trySymbol("=")) - tessellation_cache_size = size_t(cin->get().Float()*1024.0f*1024.0f); - - else if (tok == Token::Id("alloc_main_block_size") && cin->trySymbol("=")) - alloc_main_block_size = cin->get().Int(); - else if (tok == Token::Id("alloc_num_main_slots") && cin->trySymbol("=")) - alloc_num_main_slots = cin->get().Int(); - else if (tok == Token::Id("alloc_thread_block_size") && cin->trySymbol("=")) - alloc_thread_block_size = cin->get().Int(); - else if (tok == Token::Id("alloc_single_thread_alloc") && cin->trySymbol("=")) - alloc_single_thread_alloc = cin->get().Int(); - - cin->trySymbol(","); // optional , separator - } - } - - bool State::verbosity(size_t N) { - return N <= verbose; - } - - void State::print() - { - std::cout << "general:" << std::endl; - std::cout << " build threads = " << numThreads << std::endl; - std::cout << " build user threads = " << numUserThreads << std::endl; - std::cout << " start_threads = " << start_threads << std::endl; - std::cout << " affinity = " << set_affinity << std::endl; - std::cout << " frequency_level = "; - switch (frequency_level) { - case FREQUENCY_SIMD128: std::cout << "simd128" << std::endl; break; - case FREQUENCY_SIMD256: std::cout << "simd256" << std::endl; break; - case FREQUENCY_SIMD512: std::cout << "simd512" << std::endl; break; - default: std::cout << "error" << std::endl; break; - } - - std::cout << " hugepages = "; - if (!hugepages) std::cout << "disabled" << std::endl; - else if (hugepages_success) std::cout << "enabled" << std::endl; - else std::cout << "failed" << std::endl; - - std::cout << " verbosity = " << verbose << std::endl; - std::cout << " cache_size = " << float(tessellation_cache_size)*1E-6 << " MB" << std::endl; - std::cout << " max_spatial_split_replications = " << max_spatial_split_replications << std::endl; - - std::cout << "triangles:" << std::endl; - std::cout << " accel = " << tri_accel << std::endl; - std::cout << " builder = " << tri_builder << std::endl; - std::cout << " traverser = " << tri_traverser << std::endl; - - std::cout << "motion blur triangles:" << std::endl; - std::cout << " accel = " << tri_accel_mb << std::endl; - std::cout << " builder = " << tri_builder_mb << std::endl; - std::cout << " traverser = " << tri_traverser_mb << std::endl; - - std::cout << "quads:" << std::endl; - std::cout << " accel = " << quad_accel << std::endl; - std::cout << " builder = " << quad_builder << std::endl; - std::cout << " traverser = " << quad_traverser << std::endl; - - std::cout << "motion blur quads:" << std::endl; - std::cout << " accel = " << quad_accel_mb << std::endl; - std::cout << " builder = " << quad_builder_mb << std::endl; - std::cout << " traverser = " << quad_traverser_mb << std::endl; - - std::cout << "line segments:" << std::endl; - std::cout << " accel = " << line_accel << std::endl; - std::cout << " builder = " << line_builder << std::endl; - std::cout << " traverser = " << line_traverser << std::endl; - - std::cout << "motion blur line segments:" << std::endl; - std::cout << " accel = " << line_accel_mb << std::endl; - std::cout << " builder = " << line_builder_mb << std::endl; - std::cout << " traverser = " << line_traverser_mb << std::endl; - - std::cout << "hair:" << std::endl; - std::cout << " accel = " << hair_accel << std::endl; - std::cout << " builder = " << hair_builder << std::endl; - std::cout << " traverser = " << hair_traverser << std::endl; - - std::cout << "motion blur hair:" << std::endl; - std::cout << " accel = " << hair_accel_mb << std::endl; - std::cout << " builder = " << hair_builder_mb << std::endl; - std::cout << " traverser = " << hair_traverser_mb << std::endl; - - std::cout << "subdivision surfaces:" << std::endl; - std::cout << " accel = " << subdiv_accel << std::endl; - - std::cout << "grids:" << std::endl; - std::cout << " accel = " << grid_accel << std::endl; - std::cout << " builder = " << grid_builder << std::endl; - - std::cout << "motion blur grids:" << std::endl; - std::cout << " accel = " << grid_accel_mb << std::endl; - std::cout << " builder = " << grid_builder_mb << std::endl; - - std::cout << "object_accel:" << std::endl; - std::cout << " min_leaf_size = " << object_accel_min_leaf_size << std::endl; - std::cout << " max_leaf_size = " << object_accel_max_leaf_size << std::endl; - - std::cout << "object_accel_mb:" << std::endl; - std::cout << " min_leaf_size = " << object_accel_mb_min_leaf_size << std::endl; - std::cout << " max_leaf_size = " << object_accel_mb_max_leaf_size << std::endl; - } -} diff --git a/thirdparty/embree-aarch64/kernels/common/state.h b/thirdparty/embree-aarch64/kernels/common/state.h deleted file mode 100644 index d0fccc023f..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/state.h +++ /dev/null @@ -1,197 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" - -namespace embree -{ - /* mutex to make printing to cout thread safe */ - extern MutexSys g_printMutex; - - struct State : public RefCount - { - public: - /*! state construction */ - State (); - - /*! state destruction */ - ~State(); - - /*! verifies that state is correct */ - void verify(); - - /*! parses state from a configuration file */ - bool parseFile(const FileName& fileName); - - /*! parses the state from a string */ - void parseString(const char* cfg); - - /*! parses the state from a stream */ - void parse(Ref<TokenStream> cin); - - /*! prints the state */ - void print(); - - /*! checks if verbosity level is at least N */ - bool verbosity(size_t N); - - /*! checks if some particular ISA is enabled */ - bool hasISA(const int isa); - - /*! check whether selected ISA is supported by the HW */ - bool checkISASupport(); - - public: - std::string tri_accel; //!< acceleration structure to use for triangles - std::string tri_builder; //!< builder to use for triangles - std::string tri_traverser; //!< traverser to use for triangles - - public: - std::string tri_accel_mb; //!< acceleration structure to use for motion blur triangles - std::string tri_builder_mb; //!< builder to use for motion blur triangles - std::string tri_traverser_mb; //!< traverser to use for triangles - - public: - std::string quad_accel; //!< acceleration structure to use for quads - std::string quad_builder; //!< builder to use for quads - std::string quad_traverser; //!< traverser to use for quads - - public: - std::string quad_accel_mb; //!< acceleration structure to use for motion blur quads - std::string quad_builder_mb; //!< builder to use for motion blur quads - std::string quad_traverser_mb; //!< traverser to use for motion blur quads - - public: - std::string line_accel; //!< acceleration structure to use for line segments - std::string line_builder; //!< builder to use for line segments - std::string line_traverser; //!< traverser to use for line segments - - public: - std::string line_accel_mb; //!< acceleration structure to use for motion blur line segments - std::string line_builder_mb; //!< builder to use for motion blur line segments - std::string line_traverser_mb; //!< traverser to use for motion blur line segments - - public: - std::string hair_accel; //!< hair acceleration structure to use - std::string hair_builder; //!< builder to use for hair - std::string hair_traverser; //!< traverser to use for hair - - public: - std::string hair_accel_mb; //!< acceleration structure to use for motion blur hair - std::string hair_builder_mb; //!< builder to use for motion blur hair - std::string hair_traverser_mb; //!< traverser to use for motion blur hair - - public: - std::string object_accel; //!< acceleration structure for user geometries - std::string object_builder; //!< builder for user geometries - int object_accel_min_leaf_size; //!< minimum leaf size for object acceleration structure - int object_accel_max_leaf_size; //!< maximum leaf size for object acceleration structure - - public: - std::string object_accel_mb; //!< acceleration structure for user geometries - std::string object_builder_mb; //!< builder for user geometries - int object_accel_mb_min_leaf_size; //!< minimum leaf size for mblur object acceleration structure - int object_accel_mb_max_leaf_size; //!< maximum leaf size for mblur object acceleration structure - - public: - std::string subdiv_accel; //!< acceleration structure to use for subdivision surfaces - std::string subdiv_accel_mb; //!< acceleration structure to use for subdivision surfaces - - public: - std::string grid_accel; //!< acceleration structure to use for grids - std::string grid_builder; //!< builder for grids - std::string grid_accel_mb; //!< acceleration structure to use for motion blur grids - std::string grid_builder_mb; //!< builder for motion blur grids - - public: - float max_spatial_split_replications; //!< maximally replications*N many primitives in accel for spatial splits - bool useSpatialPreSplits; //!< use spatial pre-splits instead of the full spatial split builder - size_t tessellation_cache_size; //!< size of the shared tessellation cache - - public: - size_t instancing_open_min; //!< instancing opens tree to minimally that number of subtrees - size_t instancing_block_size; //!< instancing opens tree up to average block size of primitives - float instancing_open_factor; //!< instancing opens tree up to x times the number of instances - size_t instancing_open_max_depth; //!< maximum open depth for geometries - size_t instancing_open_max; //!< instancing opens tree to maximally that number of subtrees - - public: - bool ignore_config_files; //!< if true no more config files get parse - bool float_exceptions; //!< enable floating point exceptions - int quality_flags; - int scene_flags; - size_t verbose; //!< verbosity of output - size_t benchmark; //!< true - - public: - size_t numThreads; //!< number of threads to use in builders - size_t numUserThreads; //!< number of user provided threads to use in builders - bool set_affinity; //!< sets affinity for worker threads - bool start_threads; //!< true when threads should be started at device creation time - int enabled_cpu_features; //!< CPU ISA features to use - int enabled_builder_cpu_features; //!< CPU ISA features to use for builders only - enum FREQUENCY_LEVEL { - FREQUENCY_SIMD128, - FREQUENCY_SIMD256, - FREQUENCY_SIMD512 - } frequency_level; //!< frequency level the app wants to run on (default is SIMD256) - bool enable_selockmemoryprivilege; //!< configures the SeLockMemoryPrivilege under Windows to enable huge pages - bool hugepages; //!< true if huge pages should get used - bool hugepages_success; //!< status for enabling huge pages - - public: - size_t alloc_main_block_size; //!< main allocation block size (shared between threads) - int alloc_num_main_slots; //!< number of such shared blocks to be used to allocate - size_t alloc_thread_block_size; //!< size of thread local allocator block size - int alloc_single_thread_alloc; //!< in single mode nodes and leaves use same thread local allocator - - public: - - /*! checks if we can use AVX */ - bool canUseAVX() { - return hasISA(AVX) && frequency_level != FREQUENCY_SIMD128; - } - - /*! checks if we can use AVX2 */ - bool canUseAVX2() { - return hasISA(AVX2) && frequency_level != FREQUENCY_SIMD128; - } - - struct ErrorHandler - { - public: - ErrorHandler(); - ~ErrorHandler(); - RTCError* error(); - - public: - tls_t thread_error; - std::vector<RTCError*> thread_errors; - MutexSys errors_mutex; - }; - ErrorHandler errorHandler; - static ErrorHandler g_errorHandler; - - public: - void setErrorFunction(RTCErrorFunction fptr, void* uptr) - { - error_function = fptr; - error_function_userptr = uptr; - } - - RTCErrorFunction error_function; - void* error_function_userptr; - - public: - void setMemoryMonitorFunction(RTCMemoryMonitorFunction fptr, void* uptr) - { - memory_monitor_function = fptr; - memory_monitor_userptr = uptr; - } - - RTCMemoryMonitorFunction memory_monitor_function; - void* memory_monitor_userptr; - }; -} diff --git a/thirdparty/embree-aarch64/kernels/common/vector.h b/thirdparty/embree-aarch64/kernels/common/vector.h deleted file mode 100644 index b478762240..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/vector.h +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "default.h" - -namespace embree -{ - /*! invokes the memory monitor callback */ - struct MemoryMonitorInterface { - virtual void memoryMonitor(ssize_t bytes, bool post) = 0; - }; - - /*! allocator that performs aligned monitored allocations */ - template<typename T, size_t alignment = 64> - struct aligned_monitored_allocator - { - typedef T value_type; - typedef T* pointer; - typedef const T* const_pointer; - typedef T& reference; - typedef const T& const_reference; - typedef std::size_t size_type; - typedef std::ptrdiff_t difference_type; - - __forceinline aligned_monitored_allocator(MemoryMonitorInterface* device) - : device(device), hugepages(false) {} - - __forceinline pointer allocate( size_type n ) - { - if (n) { - assert(device); - device->memoryMonitor(n*sizeof(T),false); - } - if (n*sizeof(value_type) >= 14 * PAGE_SIZE_2M) - { - pointer p = (pointer) os_malloc(n*sizeof(value_type),hugepages); - assert(p); - return p; - } - return (pointer) alignedMalloc(n*sizeof(value_type),alignment); - } - - __forceinline void deallocate( pointer p, size_type n ) - { - if (p) - { - if (n*sizeof(value_type) >= 14 * PAGE_SIZE_2M) - os_free(p,n*sizeof(value_type),hugepages); - else - alignedFree(p); - } - else assert(n == 0); - - if (n) { - assert(device); - device->memoryMonitor(-ssize_t(n)*sizeof(T),true); - } - } - - __forceinline void construct( pointer p, const_reference val ) { - new (p) T(val); - } - - __forceinline void destroy( pointer p ) { - p->~T(); - } - - private: - MemoryMonitorInterface* device; - bool hugepages; - }; - - /*! monitored vector */ - template<typename T> - using mvector = vector_t<T,aligned_monitored_allocator<T,std::alignment_of<T>::value> >; -} |