diff options
Diffstat (limited to 'thirdparty/embree/kernels/common')
-rw-r--r-- | thirdparty/embree/kernels/common/accel.h | 4 | ||||
-rw-r--r-- | thirdparty/embree/kernels/common/acceln.cpp | 6 | ||||
-rw-r--r-- | thirdparty/embree/kernels/common/accelset.h | 23 | ||||
-rw-r--r-- | thirdparty/embree/kernels/common/alloc.cpp | 3 | ||||
-rw-r--r-- | thirdparty/embree/kernels/common/alloc.h | 72 | ||||
-rw-r--r-- | thirdparty/embree/kernels/common/device.cpp | 4 | ||||
-rw-r--r-- | thirdparty/embree/kernels/common/geometry.h | 4 | ||||
-rw-r--r-- | thirdparty/embree/kernels/common/isa.h | 2 | ||||
-rw-r--r-- | thirdparty/embree/kernels/common/ray.h | 2 | ||||
-rw-r--r-- | thirdparty/embree/kernels/common/rtcore.cpp | 26 | ||||
-rw-r--r-- | thirdparty/embree/kernels/common/rtcore.h | 141 | ||||
-rw-r--r-- | thirdparty/embree/kernels/common/rtcore_builder.cpp | 2 | ||||
-rw-r--r-- | thirdparty/embree/kernels/common/scene.cpp | 4 | ||||
-rw-r--r-- | thirdparty/embree/kernels/common/scene_curves.h | 8 | ||||
-rw-r--r-- | thirdparty/embree/kernels/common/state.cpp | 15 |
15 files changed, 194 insertions, 122 deletions
diff --git a/thirdparty/embree/kernels/common/accel.h b/thirdparty/embree/kernels/common/accel.h index cc4ea1805b..d24326ce92 100644 --- a/thirdparty/embree/kernels/common/accel.h +++ b/thirdparty/embree/kernels/common/accel.h @@ -332,7 +332,7 @@ namespace embree intersectorN.intersect(this,rayN,N,context); } -#if defined(__SSE__) +#if defined(__SSE__) || defined(__ARM_NEON) __forceinline void intersect(const vbool4& valid, RayHitK<4>& ray, IntersectContext* context) { const vint<4> mask = valid.mask32(); intersect4(&mask,(RTCRayHit4&)ray,context); @@ -388,7 +388,7 @@ namespace embree intersectorN.occluded(this,rayN,N,context); } -#if defined(__SSE__) +#if defined(__SSE__) || defined(__ARM_NEON) __forceinline void occluded(const vbool4& valid, RayK<4>& ray, IntersectContext* context) { const vint<4> mask = valid.mask32(); occluded4(&mask,(RTCRay4&)ray,context); diff --git a/thirdparty/embree/kernels/common/acceln.cpp b/thirdparty/embree/kernels/common/acceln.cpp index 32a27c560a..111c62083d 100644 --- a/thirdparty/embree/kernels/common/acceln.cpp +++ b/thirdparty/embree/kernels/common/acceln.cpp @@ -97,7 +97,7 @@ namespace embree for (size_t i=0; i<This->accels.size(); i++) { if (This->accels[i]->isEmpty()) continue; This->accels[i]->intersectors.occluded4(valid,ray,context); -#if defined(__SSE2__) +#if defined(__SSE2__) || defined(__ARM_NEON) vbool4 valid0 = asBool(((vint4*)valid)[0]); vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero); if (unlikely(none(valid0 & hit0))) break; @@ -111,7 +111,7 @@ namespace embree for (size_t i=0; i<This->accels.size(); i++) { if (This->accels[i]->isEmpty()) continue; This->accels[i]->intersectors.occluded8(valid,ray,context); -#if defined(__SSE2__) // FIXME: use higher ISA +#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA vbool4 valid0 = asBool(((vint4*)valid)[0]); vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero); vbool4 valid1 = asBool(((vint4*)valid)[1]); @@ -127,7 +127,7 @@ namespace embree for (size_t i=0; i<This->accels.size(); i++) { if (This->accels[i]->isEmpty()) continue; This->accels[i]->intersectors.occluded16(valid,ray,context); -#if defined(__SSE2__) // FIXME: use higher ISA +#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA vbool4 valid0 = asBool(((vint4*)valid)[0]); vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero); vbool4 valid1 = asBool(((vint4*)valid)[1]); diff --git a/thirdparty/embree/kernels/common/accelset.h b/thirdparty/embree/kernels/common/accelset.h index 90b184a07b..1b67120c97 100644 --- a/thirdparty/embree/kernels/common/accelset.h +++ b/thirdparty/embree/kernels/common/accelset.h @@ -14,21 +14,14 @@ namespace embree struct IntersectFunctionNArguments; struct OccludedFunctionNArguments; - typedef void (*ReportIntersectionFunc) (IntersectFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args); - typedef void (*ReportOcclusionFunc) (OccludedFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args); - struct IntersectFunctionNArguments : public RTCIntersectFunctionNArguments { - IntersectContext* internal_context; Geometry* geometry; - ReportIntersectionFunc report; }; struct OccludedFunctionNArguments : public RTCOccludedFunctionNArguments { - IntersectContext* internal_context; Geometry* geometry; - ReportOcclusionFunc report; }; /*! Base class for set of acceleration structures. */ @@ -145,7 +138,7 @@ namespace embree public: /*! Intersects a single ray with the scene. */ - __forceinline void intersect (RayHit& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportIntersectionFunc report) + __forceinline void intersect (RayHit& ray, unsigned int geomID, unsigned int primID, IntersectContext* context) { assert(primID < size()); assert(intersectorN.intersect); @@ -159,15 +152,13 @@ namespace embree args.N = 1; args.geomID = geomID; args.primID = primID; - args.internal_context = context; args.geometry = this; - args.report = report; intersectorN.intersect(&args); } /*! Tests if single ray is occluded by the scene. */ - __forceinline void occluded (Ray& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportOcclusionFunc report) + __forceinline void occluded (Ray& ray, unsigned int geomID, unsigned int primID, IntersectContext* context) { assert(primID < size()); assert(intersectorN.occluded); @@ -181,16 +172,14 @@ namespace embree args.N = 1; args.geomID = geomID; args.primID = primID; - args.internal_context = context; args.geometry = this; - args.report = report; intersectorN.occluded(&args); } /*! Intersects a packet of K rays with the scene. */ template<int K> - __forceinline void intersect (const vbool<K>& valid, RayHitK<K>& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportIntersectionFunc report) + __forceinline void intersect (const vbool<K>& valid, RayHitK<K>& ray, unsigned int geomID, unsigned int primID, IntersectContext* context) { assert(primID < size()); assert(intersectorN.intersect); @@ -204,16 +193,14 @@ namespace embree args.N = K; args.geomID = geomID; args.primID = primID; - args.internal_context = context; args.geometry = this; - args.report = report; intersectorN.intersect(&args); } /*! Tests if a packet of K rays is occluded by the scene. */ template<int K> - __forceinline void occluded (const vbool<K>& valid, RayK<K>& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportOcclusionFunc report) + __forceinline void occluded (const vbool<K>& valid, RayK<K>& ray, unsigned int geomID, unsigned int primID, IntersectContext* context) { assert(primID < size()); assert(intersectorN.occluded); @@ -227,9 +214,7 @@ namespace embree args.N = K; args.geomID = geomID; args.primID = primID; - args.internal_context = context; args.geometry = this; - args.report = report; intersectorN.occluded(&args); } diff --git a/thirdparty/embree/kernels/common/alloc.cpp b/thirdparty/embree/kernels/common/alloc.cpp index 1a0e1aeed3..38a76225f4 100644 --- a/thirdparty/embree/kernels/common/alloc.cpp +++ b/thirdparty/embree/kernels/common/alloc.cpp @@ -3,6 +3,9 @@ #include "alloc.h" #include "../../common/sys/thread.h" +#if defined(APPLE) && defined(__aarch64__) +#include "../../common/sys/barrier.h" +#endif namespace embree { diff --git a/thirdparty/embree/kernels/common/alloc.h b/thirdparty/embree/kernels/common/alloc.h index 4458e35c24..12769df2c8 100644 --- a/thirdparty/embree/kernels/common/alloc.h +++ b/thirdparty/embree/kernels/common/alloc.h @@ -8,6 +8,10 @@ #include "scene.h" #include "primref.h" +#if defined(APPLE) && defined(__aarch64__) +#include <mutex> +#endif + namespace embree { class FastAllocator @@ -26,7 +30,7 @@ namespace embree public: struct ThreadLocal2; - enum AllocationType { ALIGNED_MALLOC, OS_MALLOC, SHARED, ANY_TYPE }; + enum AllocationType { ALIGNED_MALLOC, EMBREE_OS_MALLOC, SHARED, ANY_TYPE }; /*! Per thread structure holding the current memory block. */ struct __aligned(64) ThreadLocal @@ -132,7 +136,11 @@ namespace embree { assert(alloc_i); if (alloc.load() == alloc_i) return; +#if defined(APPLE) && defined(__aarch64__) + std::scoped_lock lock(mutex); +#else Lock<SpinLock> lock(mutex); +#endif //if (alloc.load() == alloc_i) return; // not required as only one thread calls bind if (alloc.load()) { alloc.load()->bytesUsed += alloc0.getUsedBytes() + alloc1.getUsedBytes(); @@ -150,7 +158,11 @@ namespace embree { assert(alloc_i); if (alloc.load() != alloc_i) return; +#if defined(APPLE) && defined(__aarch64__) + std::scoped_lock lock(mutex); +#else Lock<SpinLock> lock(mutex); +#endif if (alloc.load() != alloc_i) return; // required as a different thread calls unbind alloc.load()->bytesUsed += alloc0.getUsedBytes() + alloc1.getUsedBytes(); alloc.load()->bytesFree += alloc0.getFreeBytes() + alloc1.getFreeBytes(); @@ -161,7 +173,11 @@ namespace embree } public: +#if defined(APPLE) && defined(__aarch64__) + std::mutex mutex; +#else SpinLock mutex; //!< required as unbind is called from other threads +#endif std::atomic<FastAllocator*> alloc; //!< parent allocator ThreadLocal alloc0; ThreadLocal alloc1; @@ -169,7 +185,7 @@ namespace embree FastAllocator (Device* device, bool osAllocation) : device(device), slotMask(0), usedBlocks(nullptr), freeBlocks(nullptr), use_single_mode(false), defaultBlockSize(PAGE_SIZE), estimatedSize(0), - growSize(PAGE_SIZE), maxGrowSize(maxAllocationSize), log2_grow_size_scale(0), bytesUsed(0), bytesFree(0), bytesWasted(0), atype(osAllocation ? OS_MALLOC : ALIGNED_MALLOC), + growSize(PAGE_SIZE), maxGrowSize(maxAllocationSize), log2_grow_size_scale(0), bytesUsed(0), bytesFree(0), bytesWasted(0), atype(osAllocation ? EMBREE_OS_MALLOC : ALIGNED_MALLOC), primrefarray(device,0) { for (size_t i=0; i<MAX_THREAD_USED_BLOCK_SLOTS; i++) @@ -206,7 +222,7 @@ namespace embree void setOSallocation(bool flag) { - atype = flag ? OS_MALLOC : ALIGNED_MALLOC; + atype = flag ? EMBREE_OS_MALLOC : ALIGNED_MALLOC; } private: @@ -217,7 +233,11 @@ namespace embree ThreadLocal2* alloc = thread_local_allocator2; if (alloc == nullptr) { thread_local_allocator2 = alloc = new ThreadLocal2; +#if defined(APPLE) && defined(__aarch64__) + std::scoped_lock lock(s_thread_local_allocators_lock); +#else Lock<SpinLock> lock(s_thread_local_allocators_lock); +#endif s_thread_local_allocators.push_back(make_unique(alloc)); } return alloc; @@ -227,7 +247,11 @@ namespace embree __forceinline void join(ThreadLocal2* alloc) { +#if defined(APPLE) && defined(__aarch64__) + std::scoped_lock lock(s_thread_local_allocators_lock); +#else Lock<SpinLock> lock(thread_local_allocators_lock); +#endif thread_local_allocators.push_back(alloc); } @@ -492,7 +516,11 @@ namespace embree /* parallel block creation in case of no freeBlocks, avoids single global mutex */ if (likely(freeBlocks.load() == nullptr)) { +#if defined(APPLE) && defined(__aarch64__) + std::scoped_lock lock(slotMutex[slot]); +#else Lock<SpinLock> lock(slotMutex[slot]); +#endif if (myUsedBlocks == threadUsedBlocks[slot]) { const size_t alignedBytes = (bytes+(align-1)) & ~(align-1); const size_t allocSize = max(min(growSize,maxGrowSize),alignedBytes); @@ -505,7 +533,11 @@ namespace embree /* if this fails allocate new block */ { - Lock<SpinLock> lock(mutex); +#if defined(APPLE) && defined(__aarch64__) + std::scoped_lock lock(mutex); +#else + Lock<SpinLock> lock(mutex); +#endif if (myUsedBlocks == threadUsedBlocks[slot]) { if (freeBlocks.load() != nullptr) { @@ -527,7 +559,11 @@ namespace embree /*! add new block */ void addBlock(void* ptr, ssize_t bytes) { +#if defined(APPLE) && defined(__aarch64__) + std::scoped_lock lock(mutex); +#else Lock<SpinLock> lock(mutex); +#endif const size_t sizeof_Header = offsetof(Block,data[0]); void* aptr = (void*) ((((size_t)ptr)+maxAlignment-1) & ~(maxAlignment-1)); size_t ofs = (size_t) aptr - (size_t) ptr; @@ -613,8 +649,8 @@ namespace embree bytesWasted(alloc->bytesWasted), stat_all(alloc,ANY_TYPE), stat_malloc(alloc,ALIGNED_MALLOC), - stat_4K(alloc,OS_MALLOC,false), - stat_2M(alloc,OS_MALLOC,true), + stat_4K(alloc,EMBREE_OS_MALLOC,false), + stat_2M(alloc,EMBREE_OS_MALLOC,true), stat_shared(alloc,SHARED) {} AllStatistics (size_t bytesUsed, @@ -707,7 +743,7 @@ namespace embree /* We avoid using os_malloc for small blocks as this could * cause a risk of fragmenting the virtual address space and * reach the limit of vm.max_map_count = 65k under Linux. */ - if (atype == OS_MALLOC && bytesAllocate < maxAllocationSize) + if (atype == EMBREE_OS_MALLOC && bytesAllocate < maxAllocationSize) atype = ALIGNED_MALLOC; /* we need to additionally allocate some header */ @@ -716,7 +752,7 @@ namespace embree bytesReserve = sizeof_Header+bytesReserve; /* consume full 4k pages with using os_malloc */ - if (atype == OS_MALLOC) { + if (atype == EMBREE_OS_MALLOC) { bytesAllocate = ((bytesAllocate+PAGE_SIZE-1) & ~(PAGE_SIZE-1)); bytesReserve = ((bytesReserve +PAGE_SIZE-1) & ~(PAGE_SIZE-1)); } @@ -748,11 +784,11 @@ namespace embree return new (ptr) Block(ALIGNED_MALLOC,bytesAllocate-sizeof_Header,bytesAllocate-sizeof_Header,next,alignment); } } - else if (atype == OS_MALLOC) + else if (atype == EMBREE_OS_MALLOC) { if (device) device->memoryMonitor(bytesAllocate,false); bool huge_pages; ptr = os_malloc(bytesReserve,huge_pages); - return new (ptr) Block(OS_MALLOC,bytesAllocate-sizeof_Header,bytesReserve-sizeof_Header,next,0,huge_pages); + return new (ptr) Block(EMBREE_OS_MALLOC,bytesAllocate-sizeof_Header,bytesReserve-sizeof_Header,next,0,huge_pages); } else assert(false); @@ -796,7 +832,7 @@ namespace embree if (device) device->memoryMonitor(-sizeof_Alloced,true); } - else if (atype == OS_MALLOC) { + else if (atype == EMBREE_OS_MALLOC) { size_t sizeof_This = sizeof_Header+reserveEnd; os_free(this,sizeof_This,huge_pages); if (device) device->memoryMonitor(-sizeof_Alloced,true); @@ -857,7 +893,7 @@ namespace embree bool hasType(AllocationType atype_i, bool huge_pages_i) const { if (atype_i == ANY_TYPE ) return true; - else if (atype == OS_MALLOC) return atype_i == atype && huge_pages_i == huge_pages; + else if (atype == EMBREE_OS_MALLOC) return atype_i == atype && huge_pages_i == huge_pages; else return atype_i == atype; } @@ -906,7 +942,7 @@ namespace embree void print_block() const { if (atype == ALIGNED_MALLOC) std::cout << "A"; - else if (atype == OS_MALLOC) std::cout << "O"; + else if (atype == EMBREE_OS_MALLOC) std::cout << "O"; else if (atype == SHARED) std::cout << "S"; if (huge_pages) std::cout << "H"; size_t bytesUsed = getBlockUsedBytes(); @@ -936,7 +972,11 @@ namespace embree std::atomic<Block*> freeBlocks; std::atomic<Block*> threadBlocks[MAX_THREAD_USED_BLOCK_SLOTS]; - SpinLock slotMutex[MAX_THREAD_USED_BLOCK_SLOTS]; +#if defined(APPLE) && defined(__aarch64__) + std::mutex slotMutex[MAX_THREAD_USED_BLOCK_SLOTS]; +#else + PaddedSpinLock slotMutex[MAX_THREAD_USED_BLOCK_SLOTS]; +#endif bool use_single_mode; size_t defaultBlockSize; @@ -950,7 +990,11 @@ namespace embree static __thread ThreadLocal2* thread_local_allocator2; static SpinLock s_thread_local_allocators_lock; static std::vector<std::unique_ptr<ThreadLocal2>> s_thread_local_allocators; +#if defined(APPLE) && defined(__aarch64__) + std::mutex thread_local_allocators_lock; +#else SpinLock thread_local_allocators_lock; +#endif std::vector<ThreadLocal2*> thread_local_allocators; AllocationType atype; mvector<PrimRef> primrefarray; //!< primrefarray used to allocate nodes diff --git a/thirdparty/embree/kernels/common/device.cpp b/thirdparty/embree/kernels/common/device.cpp index 068e0c2983..833ec65139 100644 --- a/thirdparty/embree/kernels/common/device.cpp +++ b/thirdparty/embree/kernels/common/device.cpp @@ -66,7 +66,11 @@ namespace embree case CPU::CORE1: frequency_level = FREQUENCY_SIMD128; break; case CPU::XEON_PHI_KNIGHTS_MILL : frequency_level = FREQUENCY_SIMD512; break; case CPU::XEON_PHI_KNIGHTS_LANDING: frequency_level = FREQUENCY_SIMD512; break; +#if defined(__APPLE__) + case CPU::ARM: frequency_level = FREQUENCY_SIMD256; break; // Apple M1 supports high throughput for SIMD4 +#else case CPU::ARM: frequency_level = FREQUENCY_SIMD128; break; +#endif } /* initialize global state */ diff --git a/thirdparty/embree/kernels/common/geometry.h b/thirdparty/embree/kernels/common/geometry.h index 2f9f2e7c94..593990f5b1 100644 --- a/thirdparty/embree/kernels/common/geometry.h +++ b/thirdparty/embree/kernels/common/geometry.h @@ -91,7 +91,7 @@ namespace embree size_t numFilterFunctions; //!< number of geometries with filter functions enabled size_t numTriangles; //!< number of enabled triangles - size_t numMBTriangles; //!< number of enabled motion blured triangles + size_t numMBTriangles; //!< number of enabled motion blurred triangles size_t numQuads; //!< number of enabled quads size_t numMBQuads; //!< number of enabled motion blurred quads size_t numBezierCurves; //!< number of enabled curves @@ -99,7 +99,7 @@ namespace embree size_t numLineSegments; //!< number of enabled line segments size_t numMBLineSegments; //!< number of enabled line motion blurred segments size_t numSubdivPatches; //!< number of enabled subdivision patches - size_t numMBSubdivPatches; //!< number of enabled motion blured subdivision patches + size_t numMBSubdivPatches; //!< number of enabled motion blurred subdivision patches size_t numUserGeometries; //!< number of enabled user geometries size_t numMBUserGeometries; //!< number of enabled motion blurred user geometries size_t numInstancesCheap; //!< number of enabled cheap instances diff --git a/thirdparty/embree/kernels/common/isa.h b/thirdparty/embree/kernels/common/isa.h index ae6556336c..9e1132e1a0 100644 --- a/thirdparty/embree/kernels/common/isa.h +++ b/thirdparty/embree/kernels/common/isa.h @@ -44,7 +44,7 @@ namespace embree #define SELECT_SYMBOL_DEFAULT(features,intersector) \ intersector = isa::intersector; -#if defined(__SSE__) +#if defined(__SSE__) || defined(__ARM_NEON) #if !defined(EMBREE_TARGET_SIMD4) #define EMBREE_TARGET_SIMD4 #endif diff --git a/thirdparty/embree/kernels/common/ray.h b/thirdparty/embree/kernels/common/ray.h index 7b951cc1e8..3c8ee3989c 100644 --- a/thirdparty/embree/kernels/common/ray.h +++ b/thirdparty/embree/kernels/common/ray.h @@ -6,7 +6,7 @@ #include "default.h" #include "instance_stack.h" -// FIXME: if ray gets seperated into ray* and hit, uload4 needs to be adjusted +// FIXME: if ray gets separated into ray* and hit, uload4 needs to be adjusted namespace embree { diff --git a/thirdparty/embree/kernels/common/rtcore.cpp b/thirdparty/embree/kernels/common/rtcore.cpp index 94b3819e42..a6ea55bfc4 100644 --- a/thirdparty/embree/kernels/common/rtcore.cpp +++ b/thirdparty/embree/kernels/common/rtcore.cpp @@ -7,6 +7,7 @@ #include "device.h" #include "scene.h" #include "context.h" +#include "../geometry/filter.h" #include "../../include/embree3/rtcore_ray.h" using namespace embree; @@ -482,7 +483,7 @@ RTC_NAMESPACE_BEGIN; IntersectContext context(scene,user_context); #if !defined(EMBREE_RAY_PACKETS) - Ray4* ray4 = (Ray4*) rayhit; + RayHit4* ray4 = (RayHit4*) rayhit; for (size_t i=0; i<4; i++) { if (!valid[i]) continue; RayHit ray1; ray4->get(i,ray1); @@ -513,7 +514,7 @@ RTC_NAMESPACE_BEGIN; IntersectContext context(scene,user_context); #if !defined(EMBREE_RAY_PACKETS) - Ray8* ray8 = (Ray8*) rayhit; + RayHit8* ray8 = (RayHit8*) rayhit; for (size_t i=0; i<8; i++) { if (!valid[i]) continue; RayHit ray1; ray8->get(i,ray1); @@ -546,7 +547,7 @@ RTC_NAMESPACE_BEGIN; IntersectContext context(scene,user_context); #if !defined(EMBREE_RAY_PACKETS) - Ray16* ray16 = (Ray16*) rayhit; + RayHit16* ray16 = (RayHit16*) rayhit; for (size_t i=0; i<16; i++) { if (!valid[i]) continue; RayHit ray1; ray16->get(i,ray1); @@ -1097,13 +1098,13 @@ RTC_NAMESPACE_BEGIN; RTC_API void rtcFilterIntersection(const struct RTCIntersectFunctionNArguments* const args_i, const struct RTCFilterFunctionNArguments* filter_args) { IntersectFunctionNArguments* args = (IntersectFunctionNArguments*) args_i; - args->report(args,filter_args); + isa::reportIntersection1(args, filter_args); } RTC_API void rtcFilterOcclusion(const struct RTCOccludedFunctionNArguments* const args_i, const struct RTCFilterFunctionNArguments* filter_args) { OccludedFunctionNArguments* args = (OccludedFunctionNArguments*) args_i; - args->report(args,filter_args); + isa::reportOcclusion1(args,filter_args); } RTC_API RTCGeometry rtcNewGeometry (RTCDevice hdevice, RTCGeometryType type) @@ -1763,4 +1764,19 @@ RTC_NAMESPACE_BEGIN; return nullptr; } + RTC_API RTCGeometry rtcGetGeometryThreadSafe (RTCScene hscene, unsigned int geomID) + { + Scene* scene = (Scene*) hscene; + RTC_CATCH_BEGIN; + RTC_TRACE(rtcGetGeometryThreadSafe); +#if defined(DEBUG) + RTC_VERIFY_HANDLE(hscene); + RTC_VERIFY_GEOMID(geomID); +#endif + Ref<Geometry> geom = scene->get_locked(geomID); + return (RTCGeometry) geom.ptr; + RTC_CATCH_END2(scene); + return nullptr; + } + RTC_NAMESPACE_END diff --git a/thirdparty/embree/kernels/common/rtcore.h b/thirdparty/embree/kernels/common/rtcore.h index f8aad7c7cb..ac58a84d6f 100644 --- a/thirdparty/embree/kernels/common/rtcore.h +++ b/thirdparty/embree/kernels/common/rtcore.h @@ -26,56 +26,59 @@ namespace embree /*! Macros used in the rtcore API implementation */ // -- GODOT start -- -// #define RTC_CATCH_BEGIN try { #define RTC_CATCH_BEGIN - -// #define RTC_CATCH_END(device) \ -// } catch (std::bad_alloc&) { \ -// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \ -// } catch (rtcore_error& e) { \ -// Device::process_error(device,e.error,e.what()); \ -// } catch (std::exception& e) { \ -// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \ -// } catch (...) { \ -// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \ -// } #define RTC_CATCH_END(device) - -// #define RTC_CATCH_END2(scene) \ -// } catch (std::bad_alloc&) { \ -// Device* device = scene ? scene->device : nullptr; \ -// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \ -// } catch (rtcore_error& e) { \ -// Device* device = scene ? scene->device : nullptr; \ -// Device::process_error(device,e.error,e.what()); \ -// } catch (std::exception& e) { \ -// Device* device = scene ? scene->device : nullptr; \ -// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \ -// } catch (...) { \ -// Device* device = scene ? scene->device : nullptr; \ -// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \ -// } #define RTC_CATCH_END2(scene) - -// #define RTC_CATCH_END2_FALSE(scene) \ -// } catch (std::bad_alloc&) { \ -// Device* device = scene ? scene->device : nullptr; \ -// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \ -// return false; \ -// } catch (rtcore_error& e) { \ -// Device* device = scene ? scene->device : nullptr; \ -// Device::process_error(device,e.error,e.what()); \ -// return false; \ -// } catch (std::exception& e) { \ -// Device* device = scene ? scene->device : nullptr; \ -// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \ -// return false; \ -// } catch (...) { \ -// Device* device = scene ? scene->device : nullptr; \ -// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \ -// return false; \ -// } #define RTC_CATCH_END2_FALSE(scene) return false; + +#if 0 +#define RTC_CATCH_BEGIN try { + +#define RTC_CATCH_END(device) \ + } catch (std::bad_alloc&) { \ + Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \ + } catch (rtcore_error& e) { \ + Device::process_error(device,e.error,e.what()); \ + } catch (std::exception& e) { \ + Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \ + } catch (...) { \ + Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \ + } + +#define RTC_CATCH_END2(scene) \ + } catch (std::bad_alloc&) { \ + Device* device = scene ? scene->device : nullptr; \ + Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \ + } catch (rtcore_error& e) { \ + Device* device = scene ? scene->device : nullptr; \ + Device::process_error(device,e.error,e.what()); \ + } catch (std::exception& e) { \ + Device* device = scene ? scene->device : nullptr; \ + Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \ + } catch (...) { \ + Device* device = scene ? scene->device : nullptr; \ + Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \ + } + +#define RTC_CATCH_END2_FALSE(scene) \ + } catch (std::bad_alloc&) { \ + Device* device = scene ? scene->device : nullptr; \ + Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \ + return false; \ + } catch (rtcore_error& e) { \ + Device* device = scene ? scene->device : nullptr; \ + Device::process_error(device,e.error,e.what()); \ + return false; \ + } catch (std::exception& e) { \ + Device* device = scene ? scene->device : nullptr; \ + Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \ + return false; \ + } catch (...) { \ + Device* device = scene ? scene->device : nullptr; \ + Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \ + return false; \ + } +#endif // -- GODOT end -- #define RTC_VERIFY_HANDLE(handle) \ @@ -103,39 +106,35 @@ namespace embree #define RTC_TRACE(x) #endif -// -- GODOT begin -- -// /*! used to throw embree API errors */ -// struct rtcore_error : public std::exception -// { -// __forceinline rtcore_error(RTCError error, const std::string& str) -// : error(error), str(str) {} -// -// ~rtcore_error() throw() {} -// -// const char* what () const throw () { -// return str.c_str(); -// } -// -// RTCError error; -// std::string str; -// }; -// -- GODOT end -- +// -- GODOT start -- +#if 0 + /*! used to throw embree API errors */ + struct rtcore_error : public std::exception + { + __forceinline rtcore_error(RTCError error, const std::string& str) + : error(error), str(str) {} + + ~rtcore_error() throw() {} + + const char* what () const throw () { + return str.c_str(); + } + + RTCError error; + std::string str; + }; +#endif #if defined(DEBUG) // only report file and line in debug mode - // -- GODOT begin -- - // #define throw_RTCError(error,str) \ - // throw rtcore_error(error,std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)); #define throw_RTCError(error,str) \ printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort(); - // -- GODOT end -- + // throw rtcore_error(error,std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)); #else - // -- GODOT begin -- - // #define throw_RTCError(error,str) \ - // throw rtcore_error(error,str); #define throw_RTCError(error,str) \ abort(); - // -- GODOT end -- + // throw rtcore_error(error,str); #endif +// -- GODOT end -- #define RTC_BUILD_ARGUMENTS_HAS(settings,member) \ (settings.byteSize > (offsetof(RTCBuildArguments,member)+sizeof(settings.member))) diff --git a/thirdparty/embree/kernels/common/rtcore_builder.cpp b/thirdparty/embree/kernels/common/rtcore_builder.cpp index 1f1b6f6ddf..29e3bdca20 100644 --- a/thirdparty/embree/kernels/common/rtcore_builder.cpp +++ b/thirdparty/embree/kernels/common/rtcore_builder.cpp @@ -371,7 +371,7 @@ RTC_NAMESPACE_BEGIN bvh->allocator.init_estimate(arguments->primitiveCount*sizeof(BBox3fa)); bvh->allocator.reset(); - /* switch between differnet builders based on quality level */ + /* switch between different builders based on quality level */ if (arguments->buildQuality == RTC_BUILD_QUALITY_LOW) return rtcBuildBVHMorton(arguments); else if (arguments->buildQuality == RTC_BUILD_QUALITY_MEDIUM) diff --git a/thirdparty/embree/kernels/common/scene.cpp b/thirdparty/embree/kernels/common/scene.cpp index 408d7eae6f..65d31d0f81 100644 --- a/thirdparty/embree/kernels/common/scene.cpp +++ b/thirdparty/embree/kernels/common/scene.cpp @@ -629,9 +629,7 @@ namespace embree if (geometry == null) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid geometry"); - if (geometry->isEnabled()) { - setModified (); - } + setModified (); accels_deleteGeometry(unsigned(geomID)); id_pool.deallocate((unsigned)geomID); geometries[geomID] = null; diff --git a/thirdparty/embree/kernels/common/scene_curves.h b/thirdparty/embree/kernels/common/scene_curves.h index a5a39e42d4..a1ea45d3c7 100644 --- a/thirdparty/embree/kernels/common/scene_curves.h +++ b/thirdparty/embree/kernels/common/scene_curves.h @@ -452,6 +452,10 @@ namespace embree const Vec3fa n1 = normal(index+1,itime); if (!isvalid(n0) || !isvalid(n1)) return false; + + const BBox3fa b = getOrientedCurveScaledRadius(i,itime).accurateBounds(); + if (!isvalid(b)) + return false; } } @@ -612,6 +616,10 @@ namespace embree const Vec3fa dn1 = dnormal(index+1,itime); if (!isvalid(dn0) || !isvalid(dn1)) return false; + + const BBox3fa b = getOrientedCurveScaledRadius(i,itime).accurateBounds(); + if (!isvalid(b)) + return false; } } diff --git a/thirdparty/embree/kernels/common/state.cpp b/thirdparty/embree/kernels/common/state.cpp index 01c862da0c..db6b803041 100644 --- a/thirdparty/embree/kernels/common/state.cpp +++ b/thirdparty/embree/kernels/common/state.cpp @@ -144,7 +144,20 @@ namespace embree } bool State::checkISASupport() { +#if defined(__ARM_NEON) + /* + * NEON CPU type is a mixture of NEON and SSE2 + */ + + bool hasSSE2 = (getCPUFeatures() & enabled_cpu_features) & CPU_FEATURE_SSE2; + + /* this will be true when explicitly initialize Device with `isa=neon` config */ + bool hasNEON = (getCPUFeatures() & enabled_cpu_features) & CPU_FEATURE_NEON; + + return hasSSE2 || hasNEON; +#else return (getCPUFeatures() & enabled_cpu_features) == enabled_cpu_features; +#endif } void State::verify() @@ -157,8 +170,10 @@ namespace embree * functions */ #if defined(DEBUG) #if defined(EMBREE_TARGET_SSE2) +#if !defined(__ARM_NEON) assert(sse2::getISA() <= SSE2); #endif +#endif #if defined(EMBREE_TARGET_SSE42) assert(sse42::getISA() <= SSE42); #endif |