summaryrefslogtreecommitdiff
path: root/thirdparty/embree/kernels/common
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/embree/kernels/common')
-rw-r--r--thirdparty/embree/kernels/common/accel.h4
-rw-r--r--thirdparty/embree/kernels/common/acceln.cpp6
-rw-r--r--thirdparty/embree/kernels/common/accelset.h23
-rw-r--r--thirdparty/embree/kernels/common/alloc.cpp3
-rw-r--r--thirdparty/embree/kernels/common/alloc.h72
-rw-r--r--thirdparty/embree/kernels/common/device.cpp4
-rw-r--r--thirdparty/embree/kernels/common/geometry.h4
-rw-r--r--thirdparty/embree/kernels/common/isa.h2
-rw-r--r--thirdparty/embree/kernels/common/ray.h2
-rw-r--r--thirdparty/embree/kernels/common/rtcore.cpp26
-rw-r--r--thirdparty/embree/kernels/common/rtcore.h141
-rw-r--r--thirdparty/embree/kernels/common/rtcore_builder.cpp2
-rw-r--r--thirdparty/embree/kernels/common/scene.cpp4
-rw-r--r--thirdparty/embree/kernels/common/scene_curves.h8
-rw-r--r--thirdparty/embree/kernels/common/state.cpp15
15 files changed, 194 insertions, 122 deletions
diff --git a/thirdparty/embree/kernels/common/accel.h b/thirdparty/embree/kernels/common/accel.h
index cc4ea1805b..d24326ce92 100644
--- a/thirdparty/embree/kernels/common/accel.h
+++ b/thirdparty/embree/kernels/common/accel.h
@@ -332,7 +332,7 @@ namespace embree
intersectorN.intersect(this,rayN,N,context);
}
-#if defined(__SSE__)
+#if defined(__SSE__) || defined(__ARM_NEON)
__forceinline void intersect(const vbool4& valid, RayHitK<4>& ray, IntersectContext* context) {
const vint<4> mask = valid.mask32();
intersect4(&mask,(RTCRayHit4&)ray,context);
@@ -388,7 +388,7 @@ namespace embree
intersectorN.occluded(this,rayN,N,context);
}
-#if defined(__SSE__)
+#if defined(__SSE__) || defined(__ARM_NEON)
__forceinline void occluded(const vbool4& valid, RayK<4>& ray, IntersectContext* context) {
const vint<4> mask = valid.mask32();
occluded4(&mask,(RTCRay4&)ray,context);
diff --git a/thirdparty/embree/kernels/common/acceln.cpp b/thirdparty/embree/kernels/common/acceln.cpp
index 32a27c560a..111c62083d 100644
--- a/thirdparty/embree/kernels/common/acceln.cpp
+++ b/thirdparty/embree/kernels/common/acceln.cpp
@@ -97,7 +97,7 @@ namespace embree
for (size_t i=0; i<This->accels.size(); i++) {
if (This->accels[i]->isEmpty()) continue;
This->accels[i]->intersectors.occluded4(valid,ray,context);
-#if defined(__SSE2__)
+#if defined(__SSE2__) || defined(__ARM_NEON)
vbool4 valid0 = asBool(((vint4*)valid)[0]);
vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
if (unlikely(none(valid0 & hit0))) break;
@@ -111,7 +111,7 @@ namespace embree
for (size_t i=0; i<This->accels.size(); i++) {
if (This->accels[i]->isEmpty()) continue;
This->accels[i]->intersectors.occluded8(valid,ray,context);
-#if defined(__SSE2__) // FIXME: use higher ISA
+#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA
vbool4 valid0 = asBool(((vint4*)valid)[0]);
vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
vbool4 valid1 = asBool(((vint4*)valid)[1]);
@@ -127,7 +127,7 @@ namespace embree
for (size_t i=0; i<This->accels.size(); i++) {
if (This->accels[i]->isEmpty()) continue;
This->accels[i]->intersectors.occluded16(valid,ray,context);
-#if defined(__SSE2__) // FIXME: use higher ISA
+#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA
vbool4 valid0 = asBool(((vint4*)valid)[0]);
vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
vbool4 valid1 = asBool(((vint4*)valid)[1]);
diff --git a/thirdparty/embree/kernels/common/accelset.h b/thirdparty/embree/kernels/common/accelset.h
index 90b184a07b..1b67120c97 100644
--- a/thirdparty/embree/kernels/common/accelset.h
+++ b/thirdparty/embree/kernels/common/accelset.h
@@ -14,21 +14,14 @@ namespace embree
struct IntersectFunctionNArguments;
struct OccludedFunctionNArguments;
- typedef void (*ReportIntersectionFunc) (IntersectFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args);
- typedef void (*ReportOcclusionFunc) (OccludedFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args);
-
struct IntersectFunctionNArguments : public RTCIntersectFunctionNArguments
{
- IntersectContext* internal_context;
Geometry* geometry;
- ReportIntersectionFunc report;
};
struct OccludedFunctionNArguments : public RTCOccludedFunctionNArguments
{
- IntersectContext* internal_context;
Geometry* geometry;
- ReportOcclusionFunc report;
};
/*! Base class for set of acceleration structures. */
@@ -145,7 +138,7 @@ namespace embree
public:
/*! Intersects a single ray with the scene. */
- __forceinline void intersect (RayHit& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportIntersectionFunc report)
+ __forceinline void intersect (RayHit& ray, unsigned int geomID, unsigned int primID, IntersectContext* context)
{
assert(primID < size());
assert(intersectorN.intersect);
@@ -159,15 +152,13 @@ namespace embree
args.N = 1;
args.geomID = geomID;
args.primID = primID;
- args.internal_context = context;
args.geometry = this;
- args.report = report;
intersectorN.intersect(&args);
}
/*! Tests if single ray is occluded by the scene. */
- __forceinline void occluded (Ray& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportOcclusionFunc report)
+ __forceinline void occluded (Ray& ray, unsigned int geomID, unsigned int primID, IntersectContext* context)
{
assert(primID < size());
assert(intersectorN.occluded);
@@ -181,16 +172,14 @@ namespace embree
args.N = 1;
args.geomID = geomID;
args.primID = primID;
- args.internal_context = context;
args.geometry = this;
- args.report = report;
intersectorN.occluded(&args);
}
/*! Intersects a packet of K rays with the scene. */
template<int K>
- __forceinline void intersect (const vbool<K>& valid, RayHitK<K>& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportIntersectionFunc report)
+ __forceinline void intersect (const vbool<K>& valid, RayHitK<K>& ray, unsigned int geomID, unsigned int primID, IntersectContext* context)
{
assert(primID < size());
assert(intersectorN.intersect);
@@ -204,16 +193,14 @@ namespace embree
args.N = K;
args.geomID = geomID;
args.primID = primID;
- args.internal_context = context;
args.geometry = this;
- args.report = report;
intersectorN.intersect(&args);
}
/*! Tests if a packet of K rays is occluded by the scene. */
template<int K>
- __forceinline void occluded (const vbool<K>& valid, RayK<K>& ray, unsigned int geomID, unsigned int primID, IntersectContext* context, ReportOcclusionFunc report)
+ __forceinline void occluded (const vbool<K>& valid, RayK<K>& ray, unsigned int geomID, unsigned int primID, IntersectContext* context)
{
assert(primID < size());
assert(intersectorN.occluded);
@@ -227,9 +214,7 @@ namespace embree
args.N = K;
args.geomID = geomID;
args.primID = primID;
- args.internal_context = context;
args.geometry = this;
- args.report = report;
intersectorN.occluded(&args);
}
diff --git a/thirdparty/embree/kernels/common/alloc.cpp b/thirdparty/embree/kernels/common/alloc.cpp
index 1a0e1aeed3..38a76225f4 100644
--- a/thirdparty/embree/kernels/common/alloc.cpp
+++ b/thirdparty/embree/kernels/common/alloc.cpp
@@ -3,6 +3,9 @@
#include "alloc.h"
#include "../../common/sys/thread.h"
+#if defined(APPLE) && defined(__aarch64__)
+#include "../../common/sys/barrier.h"
+#endif
namespace embree
{
diff --git a/thirdparty/embree/kernels/common/alloc.h b/thirdparty/embree/kernels/common/alloc.h
index 4458e35c24..12769df2c8 100644
--- a/thirdparty/embree/kernels/common/alloc.h
+++ b/thirdparty/embree/kernels/common/alloc.h
@@ -8,6 +8,10 @@
#include "scene.h"
#include "primref.h"
+#if defined(APPLE) && defined(__aarch64__)
+#include <mutex>
+#endif
+
namespace embree
{
class FastAllocator
@@ -26,7 +30,7 @@ namespace embree
public:
struct ThreadLocal2;
- enum AllocationType { ALIGNED_MALLOC, OS_MALLOC, SHARED, ANY_TYPE };
+ enum AllocationType { ALIGNED_MALLOC, EMBREE_OS_MALLOC, SHARED, ANY_TYPE };
/*! Per thread structure holding the current memory block. */
struct __aligned(64) ThreadLocal
@@ -132,7 +136,11 @@ namespace embree
{
assert(alloc_i);
if (alloc.load() == alloc_i) return;
+#if defined(APPLE) && defined(__aarch64__)
+ std::scoped_lock lock(mutex);
+#else
Lock<SpinLock> lock(mutex);
+#endif
//if (alloc.load() == alloc_i) return; // not required as only one thread calls bind
if (alloc.load()) {
alloc.load()->bytesUsed += alloc0.getUsedBytes() + alloc1.getUsedBytes();
@@ -150,7 +158,11 @@ namespace embree
{
assert(alloc_i);
if (alloc.load() != alloc_i) return;
+#if defined(APPLE) && defined(__aarch64__)
+ std::scoped_lock lock(mutex);
+#else
Lock<SpinLock> lock(mutex);
+#endif
if (alloc.load() != alloc_i) return; // required as a different thread calls unbind
alloc.load()->bytesUsed += alloc0.getUsedBytes() + alloc1.getUsedBytes();
alloc.load()->bytesFree += alloc0.getFreeBytes() + alloc1.getFreeBytes();
@@ -161,7 +173,11 @@ namespace embree
}
public:
+#if defined(APPLE) && defined(__aarch64__)
+ std::mutex mutex;
+#else
SpinLock mutex; //!< required as unbind is called from other threads
+#endif
std::atomic<FastAllocator*> alloc; //!< parent allocator
ThreadLocal alloc0;
ThreadLocal alloc1;
@@ -169,7 +185,7 @@ namespace embree
FastAllocator (Device* device, bool osAllocation)
: device(device), slotMask(0), usedBlocks(nullptr), freeBlocks(nullptr), use_single_mode(false), defaultBlockSize(PAGE_SIZE), estimatedSize(0),
- growSize(PAGE_SIZE), maxGrowSize(maxAllocationSize), log2_grow_size_scale(0), bytesUsed(0), bytesFree(0), bytesWasted(0), atype(osAllocation ? OS_MALLOC : ALIGNED_MALLOC),
+ growSize(PAGE_SIZE), maxGrowSize(maxAllocationSize), log2_grow_size_scale(0), bytesUsed(0), bytesFree(0), bytesWasted(0), atype(osAllocation ? EMBREE_OS_MALLOC : ALIGNED_MALLOC),
primrefarray(device,0)
{
for (size_t i=0; i<MAX_THREAD_USED_BLOCK_SLOTS; i++)
@@ -206,7 +222,7 @@ namespace embree
void setOSallocation(bool flag)
{
- atype = flag ? OS_MALLOC : ALIGNED_MALLOC;
+ atype = flag ? EMBREE_OS_MALLOC : ALIGNED_MALLOC;
}
private:
@@ -217,7 +233,11 @@ namespace embree
ThreadLocal2* alloc = thread_local_allocator2;
if (alloc == nullptr) {
thread_local_allocator2 = alloc = new ThreadLocal2;
+#if defined(APPLE) && defined(__aarch64__)
+ std::scoped_lock lock(s_thread_local_allocators_lock);
+#else
Lock<SpinLock> lock(s_thread_local_allocators_lock);
+#endif
s_thread_local_allocators.push_back(make_unique(alloc));
}
return alloc;
@@ -227,7 +247,11 @@ namespace embree
__forceinline void join(ThreadLocal2* alloc)
{
+#if defined(APPLE) && defined(__aarch64__)
+ std::scoped_lock lock(s_thread_local_allocators_lock);
+#else
Lock<SpinLock> lock(thread_local_allocators_lock);
+#endif
thread_local_allocators.push_back(alloc);
}
@@ -492,7 +516,11 @@ namespace embree
/* parallel block creation in case of no freeBlocks, avoids single global mutex */
if (likely(freeBlocks.load() == nullptr))
{
+#if defined(APPLE) && defined(__aarch64__)
+ std::scoped_lock lock(slotMutex[slot]);
+#else
Lock<SpinLock> lock(slotMutex[slot]);
+#endif
if (myUsedBlocks == threadUsedBlocks[slot]) {
const size_t alignedBytes = (bytes+(align-1)) & ~(align-1);
const size_t allocSize = max(min(growSize,maxGrowSize),alignedBytes);
@@ -505,7 +533,11 @@ namespace embree
/* if this fails allocate new block */
{
- Lock<SpinLock> lock(mutex);
+#if defined(APPLE) && defined(__aarch64__)
+ std::scoped_lock lock(mutex);
+#else
+ Lock<SpinLock> lock(mutex);
+#endif
if (myUsedBlocks == threadUsedBlocks[slot])
{
if (freeBlocks.load() != nullptr) {
@@ -527,7 +559,11 @@ namespace embree
/*! add new block */
void addBlock(void* ptr, ssize_t bytes)
{
+#if defined(APPLE) && defined(__aarch64__)
+ std::scoped_lock lock(mutex);
+#else
Lock<SpinLock> lock(mutex);
+#endif
const size_t sizeof_Header = offsetof(Block,data[0]);
void* aptr = (void*) ((((size_t)ptr)+maxAlignment-1) & ~(maxAlignment-1));
size_t ofs = (size_t) aptr - (size_t) ptr;
@@ -613,8 +649,8 @@ namespace embree
bytesWasted(alloc->bytesWasted),
stat_all(alloc,ANY_TYPE),
stat_malloc(alloc,ALIGNED_MALLOC),
- stat_4K(alloc,OS_MALLOC,false),
- stat_2M(alloc,OS_MALLOC,true),
+ stat_4K(alloc,EMBREE_OS_MALLOC,false),
+ stat_2M(alloc,EMBREE_OS_MALLOC,true),
stat_shared(alloc,SHARED) {}
AllStatistics (size_t bytesUsed,
@@ -707,7 +743,7 @@ namespace embree
/* We avoid using os_malloc for small blocks as this could
* cause a risk of fragmenting the virtual address space and
* reach the limit of vm.max_map_count = 65k under Linux. */
- if (atype == OS_MALLOC && bytesAllocate < maxAllocationSize)
+ if (atype == EMBREE_OS_MALLOC && bytesAllocate < maxAllocationSize)
atype = ALIGNED_MALLOC;
/* we need to additionally allocate some header */
@@ -716,7 +752,7 @@ namespace embree
bytesReserve = sizeof_Header+bytesReserve;
/* consume full 4k pages with using os_malloc */
- if (atype == OS_MALLOC) {
+ if (atype == EMBREE_OS_MALLOC) {
bytesAllocate = ((bytesAllocate+PAGE_SIZE-1) & ~(PAGE_SIZE-1));
bytesReserve = ((bytesReserve +PAGE_SIZE-1) & ~(PAGE_SIZE-1));
}
@@ -748,11 +784,11 @@ namespace embree
return new (ptr) Block(ALIGNED_MALLOC,bytesAllocate-sizeof_Header,bytesAllocate-sizeof_Header,next,alignment);
}
}
- else if (atype == OS_MALLOC)
+ else if (atype == EMBREE_OS_MALLOC)
{
if (device) device->memoryMonitor(bytesAllocate,false);
bool huge_pages; ptr = os_malloc(bytesReserve,huge_pages);
- return new (ptr) Block(OS_MALLOC,bytesAllocate-sizeof_Header,bytesReserve-sizeof_Header,next,0,huge_pages);
+ return new (ptr) Block(EMBREE_OS_MALLOC,bytesAllocate-sizeof_Header,bytesReserve-sizeof_Header,next,0,huge_pages);
}
else
assert(false);
@@ -796,7 +832,7 @@ namespace embree
if (device) device->memoryMonitor(-sizeof_Alloced,true);
}
- else if (atype == OS_MALLOC) {
+ else if (atype == EMBREE_OS_MALLOC) {
size_t sizeof_This = sizeof_Header+reserveEnd;
os_free(this,sizeof_This,huge_pages);
if (device) device->memoryMonitor(-sizeof_Alloced,true);
@@ -857,7 +893,7 @@ namespace embree
bool hasType(AllocationType atype_i, bool huge_pages_i) const
{
if (atype_i == ANY_TYPE ) return true;
- else if (atype == OS_MALLOC) return atype_i == atype && huge_pages_i == huge_pages;
+ else if (atype == EMBREE_OS_MALLOC) return atype_i == atype && huge_pages_i == huge_pages;
else return atype_i == atype;
}
@@ -906,7 +942,7 @@ namespace embree
void print_block() const
{
if (atype == ALIGNED_MALLOC) std::cout << "A";
- else if (atype == OS_MALLOC) std::cout << "O";
+ else if (atype == EMBREE_OS_MALLOC) std::cout << "O";
else if (atype == SHARED) std::cout << "S";
if (huge_pages) std::cout << "H";
size_t bytesUsed = getBlockUsedBytes();
@@ -936,7 +972,11 @@ namespace embree
std::atomic<Block*> freeBlocks;
std::atomic<Block*> threadBlocks[MAX_THREAD_USED_BLOCK_SLOTS];
- SpinLock slotMutex[MAX_THREAD_USED_BLOCK_SLOTS];
+#if defined(APPLE) && defined(__aarch64__)
+ std::mutex slotMutex[MAX_THREAD_USED_BLOCK_SLOTS];
+#else
+ PaddedSpinLock slotMutex[MAX_THREAD_USED_BLOCK_SLOTS];
+#endif
bool use_single_mode;
size_t defaultBlockSize;
@@ -950,7 +990,11 @@ namespace embree
static __thread ThreadLocal2* thread_local_allocator2;
static SpinLock s_thread_local_allocators_lock;
static std::vector<std::unique_ptr<ThreadLocal2>> s_thread_local_allocators;
+#if defined(APPLE) && defined(__aarch64__)
+ std::mutex thread_local_allocators_lock;
+#else
SpinLock thread_local_allocators_lock;
+#endif
std::vector<ThreadLocal2*> thread_local_allocators;
AllocationType atype;
mvector<PrimRef> primrefarray; //!< primrefarray used to allocate nodes
diff --git a/thirdparty/embree/kernels/common/device.cpp b/thirdparty/embree/kernels/common/device.cpp
index 068e0c2983..833ec65139 100644
--- a/thirdparty/embree/kernels/common/device.cpp
+++ b/thirdparty/embree/kernels/common/device.cpp
@@ -66,7 +66,11 @@ namespace embree
case CPU::CORE1: frequency_level = FREQUENCY_SIMD128; break;
case CPU::XEON_PHI_KNIGHTS_MILL : frequency_level = FREQUENCY_SIMD512; break;
case CPU::XEON_PHI_KNIGHTS_LANDING: frequency_level = FREQUENCY_SIMD512; break;
+#if defined(__APPLE__)
+ case CPU::ARM: frequency_level = FREQUENCY_SIMD256; break; // Apple M1 supports high throughput for SIMD4
+#else
case CPU::ARM: frequency_level = FREQUENCY_SIMD128; break;
+#endif
}
/* initialize global state */
diff --git a/thirdparty/embree/kernels/common/geometry.h b/thirdparty/embree/kernels/common/geometry.h
index 2f9f2e7c94..593990f5b1 100644
--- a/thirdparty/embree/kernels/common/geometry.h
+++ b/thirdparty/embree/kernels/common/geometry.h
@@ -91,7 +91,7 @@ namespace embree
size_t numFilterFunctions; //!< number of geometries with filter functions enabled
size_t numTriangles; //!< number of enabled triangles
- size_t numMBTriangles; //!< number of enabled motion blured triangles
+ size_t numMBTriangles; //!< number of enabled motion blurred triangles
size_t numQuads; //!< number of enabled quads
size_t numMBQuads; //!< number of enabled motion blurred quads
size_t numBezierCurves; //!< number of enabled curves
@@ -99,7 +99,7 @@ namespace embree
size_t numLineSegments; //!< number of enabled line segments
size_t numMBLineSegments; //!< number of enabled line motion blurred segments
size_t numSubdivPatches; //!< number of enabled subdivision patches
- size_t numMBSubdivPatches; //!< number of enabled motion blured subdivision patches
+ size_t numMBSubdivPatches; //!< number of enabled motion blurred subdivision patches
size_t numUserGeometries; //!< number of enabled user geometries
size_t numMBUserGeometries; //!< number of enabled motion blurred user geometries
size_t numInstancesCheap; //!< number of enabled cheap instances
diff --git a/thirdparty/embree/kernels/common/isa.h b/thirdparty/embree/kernels/common/isa.h
index ae6556336c..9e1132e1a0 100644
--- a/thirdparty/embree/kernels/common/isa.h
+++ b/thirdparty/embree/kernels/common/isa.h
@@ -44,7 +44,7 @@ namespace embree
#define SELECT_SYMBOL_DEFAULT(features,intersector) \
intersector = isa::intersector;
-#if defined(__SSE__)
+#if defined(__SSE__) || defined(__ARM_NEON)
#if !defined(EMBREE_TARGET_SIMD4)
#define EMBREE_TARGET_SIMD4
#endif
diff --git a/thirdparty/embree/kernels/common/ray.h b/thirdparty/embree/kernels/common/ray.h
index 7b951cc1e8..3c8ee3989c 100644
--- a/thirdparty/embree/kernels/common/ray.h
+++ b/thirdparty/embree/kernels/common/ray.h
@@ -6,7 +6,7 @@
#include "default.h"
#include "instance_stack.h"
-// FIXME: if ray gets seperated into ray* and hit, uload4 needs to be adjusted
+// FIXME: if ray gets separated into ray* and hit, uload4 needs to be adjusted
namespace embree
{
diff --git a/thirdparty/embree/kernels/common/rtcore.cpp b/thirdparty/embree/kernels/common/rtcore.cpp
index 94b3819e42..a6ea55bfc4 100644
--- a/thirdparty/embree/kernels/common/rtcore.cpp
+++ b/thirdparty/embree/kernels/common/rtcore.cpp
@@ -7,6 +7,7 @@
#include "device.h"
#include "scene.h"
#include "context.h"
+#include "../geometry/filter.h"
#include "../../include/embree3/rtcore_ray.h"
using namespace embree;
@@ -482,7 +483,7 @@ RTC_NAMESPACE_BEGIN;
IntersectContext context(scene,user_context);
#if !defined(EMBREE_RAY_PACKETS)
- Ray4* ray4 = (Ray4*) rayhit;
+ RayHit4* ray4 = (RayHit4*) rayhit;
for (size_t i=0; i<4; i++) {
if (!valid[i]) continue;
RayHit ray1; ray4->get(i,ray1);
@@ -513,7 +514,7 @@ RTC_NAMESPACE_BEGIN;
IntersectContext context(scene,user_context);
#if !defined(EMBREE_RAY_PACKETS)
- Ray8* ray8 = (Ray8*) rayhit;
+ RayHit8* ray8 = (RayHit8*) rayhit;
for (size_t i=0; i<8; i++) {
if (!valid[i]) continue;
RayHit ray1; ray8->get(i,ray1);
@@ -546,7 +547,7 @@ RTC_NAMESPACE_BEGIN;
IntersectContext context(scene,user_context);
#if !defined(EMBREE_RAY_PACKETS)
- Ray16* ray16 = (Ray16*) rayhit;
+ RayHit16* ray16 = (RayHit16*) rayhit;
for (size_t i=0; i<16; i++) {
if (!valid[i]) continue;
RayHit ray1; ray16->get(i,ray1);
@@ -1097,13 +1098,13 @@ RTC_NAMESPACE_BEGIN;
RTC_API void rtcFilterIntersection(const struct RTCIntersectFunctionNArguments* const args_i, const struct RTCFilterFunctionNArguments* filter_args)
{
IntersectFunctionNArguments* args = (IntersectFunctionNArguments*) args_i;
- args->report(args,filter_args);
+ isa::reportIntersection1(args, filter_args);
}
RTC_API void rtcFilterOcclusion(const struct RTCOccludedFunctionNArguments* const args_i, const struct RTCFilterFunctionNArguments* filter_args)
{
OccludedFunctionNArguments* args = (OccludedFunctionNArguments*) args_i;
- args->report(args,filter_args);
+ isa::reportOcclusion1(args,filter_args);
}
RTC_API RTCGeometry rtcNewGeometry (RTCDevice hdevice, RTCGeometryType type)
@@ -1763,4 +1764,19 @@ RTC_NAMESPACE_BEGIN;
return nullptr;
}
+ RTC_API RTCGeometry rtcGetGeometryThreadSafe (RTCScene hscene, unsigned int geomID)
+ {
+ Scene* scene = (Scene*) hscene;
+ RTC_CATCH_BEGIN;
+ RTC_TRACE(rtcGetGeometryThreadSafe);
+#if defined(DEBUG)
+ RTC_VERIFY_HANDLE(hscene);
+ RTC_VERIFY_GEOMID(geomID);
+#endif
+ Ref<Geometry> geom = scene->get_locked(geomID);
+ return (RTCGeometry) geom.ptr;
+ RTC_CATCH_END2(scene);
+ return nullptr;
+ }
+
RTC_NAMESPACE_END
diff --git a/thirdparty/embree/kernels/common/rtcore.h b/thirdparty/embree/kernels/common/rtcore.h
index f8aad7c7cb..ac58a84d6f 100644
--- a/thirdparty/embree/kernels/common/rtcore.h
+++ b/thirdparty/embree/kernels/common/rtcore.h
@@ -26,56 +26,59 @@ namespace embree
/*! Macros used in the rtcore API implementation */
// -- GODOT start --
-// #define RTC_CATCH_BEGIN try {
#define RTC_CATCH_BEGIN
-
-// #define RTC_CATCH_END(device) \
-// } catch (std::bad_alloc&) { \
-// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
-// } catch (rtcore_error& e) { \
-// Device::process_error(device,e.error,e.what()); \
-// } catch (std::exception& e) { \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
-// } catch (...) { \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
-// }
#define RTC_CATCH_END(device)
-
-// #define RTC_CATCH_END2(scene) \
-// } catch (std::bad_alloc&) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
-// } catch (rtcore_error& e) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,e.error,e.what()); \
-// } catch (std::exception& e) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
-// } catch (...) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
-// }
#define RTC_CATCH_END2(scene)
-
-// #define RTC_CATCH_END2_FALSE(scene) \
-// } catch (std::bad_alloc&) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
-// return false; \
-// } catch (rtcore_error& e) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,e.error,e.what()); \
-// return false; \
-// } catch (std::exception& e) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
-// return false; \
-// } catch (...) { \
-// Device* device = scene ? scene->device : nullptr; \
-// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
-// return false; \
-// }
#define RTC_CATCH_END2_FALSE(scene) return false;
+
+#if 0
+#define RTC_CATCH_BEGIN try {
+
+#define RTC_CATCH_END(device) \
+ } catch (std::bad_alloc&) { \
+ Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
+ } catch (rtcore_error& e) { \
+ Device::process_error(device,e.error,e.what()); \
+ } catch (std::exception& e) { \
+ Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
+ } catch (...) { \
+ Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
+ }
+
+#define RTC_CATCH_END2(scene) \
+ } catch (std::bad_alloc&) { \
+ Device* device = scene ? scene->device : nullptr; \
+ Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
+ } catch (rtcore_error& e) { \
+ Device* device = scene ? scene->device : nullptr; \
+ Device::process_error(device,e.error,e.what()); \
+ } catch (std::exception& e) { \
+ Device* device = scene ? scene->device : nullptr; \
+ Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
+ } catch (...) { \
+ Device* device = scene ? scene->device : nullptr; \
+ Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
+ }
+
+#define RTC_CATCH_END2_FALSE(scene) \
+ } catch (std::bad_alloc&) { \
+ Device* device = scene ? scene->device : nullptr; \
+ Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
+ return false; \
+ } catch (rtcore_error& e) { \
+ Device* device = scene ? scene->device : nullptr; \
+ Device::process_error(device,e.error,e.what()); \
+ return false; \
+ } catch (std::exception& e) { \
+ Device* device = scene ? scene->device : nullptr; \
+ Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
+ return false; \
+ } catch (...) { \
+ Device* device = scene ? scene->device : nullptr; \
+ Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
+ return false; \
+ }
+#endif
// -- GODOT end --
#define RTC_VERIFY_HANDLE(handle) \
@@ -103,39 +106,35 @@ namespace embree
#define RTC_TRACE(x)
#endif
-// -- GODOT begin --
-// /*! used to throw embree API errors */
-// struct rtcore_error : public std::exception
-// {
-// __forceinline rtcore_error(RTCError error, const std::string& str)
-// : error(error), str(str) {}
-//
-// ~rtcore_error() throw() {}
-//
-// const char* what () const throw () {
-// return str.c_str();
-// }
-//
-// RTCError error;
-// std::string str;
-// };
-// -- GODOT end --
+// -- GODOT start --
+#if 0
+ /*! used to throw embree API errors */
+ struct rtcore_error : public std::exception
+ {
+ __forceinline rtcore_error(RTCError error, const std::string& str)
+ : error(error), str(str) {}
+
+ ~rtcore_error() throw() {}
+
+ const char* what () const throw () {
+ return str.c_str();
+ }
+
+ RTCError error;
+ std::string str;
+ };
+#endif
#if defined(DEBUG) // only report file and line in debug mode
- // -- GODOT begin --
- // #define throw_RTCError(error,str) \
- // throw rtcore_error(error,std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
#define throw_RTCError(error,str) \
printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort();
- // -- GODOT end --
+ // throw rtcore_error(error,std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
#else
- // -- GODOT begin --
- // #define throw_RTCError(error,str) \
- // throw rtcore_error(error,str);
#define throw_RTCError(error,str) \
abort();
- // -- GODOT end --
+ // throw rtcore_error(error,str);
#endif
+// -- GODOT end --
#define RTC_BUILD_ARGUMENTS_HAS(settings,member) \
(settings.byteSize > (offsetof(RTCBuildArguments,member)+sizeof(settings.member)))
diff --git a/thirdparty/embree/kernels/common/rtcore_builder.cpp b/thirdparty/embree/kernels/common/rtcore_builder.cpp
index 1f1b6f6ddf..29e3bdca20 100644
--- a/thirdparty/embree/kernels/common/rtcore_builder.cpp
+++ b/thirdparty/embree/kernels/common/rtcore_builder.cpp
@@ -371,7 +371,7 @@ RTC_NAMESPACE_BEGIN
bvh->allocator.init_estimate(arguments->primitiveCount*sizeof(BBox3fa));
bvh->allocator.reset();
- /* switch between differnet builders based on quality level */
+ /* switch between different builders based on quality level */
if (arguments->buildQuality == RTC_BUILD_QUALITY_LOW)
return rtcBuildBVHMorton(arguments);
else if (arguments->buildQuality == RTC_BUILD_QUALITY_MEDIUM)
diff --git a/thirdparty/embree/kernels/common/scene.cpp b/thirdparty/embree/kernels/common/scene.cpp
index 408d7eae6f..65d31d0f81 100644
--- a/thirdparty/embree/kernels/common/scene.cpp
+++ b/thirdparty/embree/kernels/common/scene.cpp
@@ -629,9 +629,7 @@ namespace embree
if (geometry == null)
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid geometry");
- if (geometry->isEnabled()) {
- setModified ();
- }
+ setModified ();
accels_deleteGeometry(unsigned(geomID));
id_pool.deallocate((unsigned)geomID);
geometries[geomID] = null;
diff --git a/thirdparty/embree/kernels/common/scene_curves.h b/thirdparty/embree/kernels/common/scene_curves.h
index a5a39e42d4..a1ea45d3c7 100644
--- a/thirdparty/embree/kernels/common/scene_curves.h
+++ b/thirdparty/embree/kernels/common/scene_curves.h
@@ -452,6 +452,10 @@ namespace embree
const Vec3fa n1 = normal(index+1,itime);
if (!isvalid(n0) || !isvalid(n1))
return false;
+
+ const BBox3fa b = getOrientedCurveScaledRadius(i,itime).accurateBounds();
+ if (!isvalid(b))
+ return false;
}
}
@@ -612,6 +616,10 @@ namespace embree
const Vec3fa dn1 = dnormal(index+1,itime);
if (!isvalid(dn0) || !isvalid(dn1))
return false;
+
+ const BBox3fa b = getOrientedCurveScaledRadius(i,itime).accurateBounds();
+ if (!isvalid(b))
+ return false;
}
}
diff --git a/thirdparty/embree/kernels/common/state.cpp b/thirdparty/embree/kernels/common/state.cpp
index 01c862da0c..db6b803041 100644
--- a/thirdparty/embree/kernels/common/state.cpp
+++ b/thirdparty/embree/kernels/common/state.cpp
@@ -144,7 +144,20 @@ namespace embree
}
bool State::checkISASupport() {
+#if defined(__ARM_NEON)
+ /*
+ * NEON CPU type is a mixture of NEON and SSE2
+ */
+
+ bool hasSSE2 = (getCPUFeatures() & enabled_cpu_features) & CPU_FEATURE_SSE2;
+
+ /* this will be true when explicitly initialize Device with `isa=neon` config */
+ bool hasNEON = (getCPUFeatures() & enabled_cpu_features) & CPU_FEATURE_NEON;
+
+ return hasSSE2 || hasNEON;
+#else
return (getCPUFeatures() & enabled_cpu_features) == enabled_cpu_features;
+#endif
}
void State::verify()
@@ -157,8 +170,10 @@ namespace embree
* functions */
#if defined(DEBUG)
#if defined(EMBREE_TARGET_SSE2)
+#if !defined(__ARM_NEON)
assert(sse2::getISA() <= SSE2);
#endif
+#endif
#if defined(EMBREE_TARGET_SSE42)
assert(sse42::getISA() <= SSE42);
#endif