summaryrefslogtreecommitdiff
path: root/thirdparty/embree/kernels/common/alloc.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/embree/kernels/common/alloc.h')
-rw-r--r--thirdparty/embree/kernels/common/alloc.h72
1 files changed, 58 insertions, 14 deletions
diff --git a/thirdparty/embree/kernels/common/alloc.h b/thirdparty/embree/kernels/common/alloc.h
index 4458e35c24..12769df2c8 100644
--- a/thirdparty/embree/kernels/common/alloc.h
+++ b/thirdparty/embree/kernels/common/alloc.h
@@ -8,6 +8,10 @@
#include "scene.h"
#include "primref.h"
+#if defined(APPLE) && defined(__aarch64__)
+#include <mutex>
+#endif
+
namespace embree
{
class FastAllocator
@@ -26,7 +30,7 @@ namespace embree
public:
struct ThreadLocal2;
- enum AllocationType { ALIGNED_MALLOC, OS_MALLOC, SHARED, ANY_TYPE };
+ enum AllocationType { ALIGNED_MALLOC, EMBREE_OS_MALLOC, SHARED, ANY_TYPE };
/*! Per thread structure holding the current memory block. */
struct __aligned(64) ThreadLocal
@@ -132,7 +136,11 @@ namespace embree
{
assert(alloc_i);
if (alloc.load() == alloc_i) return;
+#if defined(APPLE) && defined(__aarch64__)
+ std::scoped_lock lock(mutex);
+#else
Lock<SpinLock> lock(mutex);
+#endif
//if (alloc.load() == alloc_i) return; // not required as only one thread calls bind
if (alloc.load()) {
alloc.load()->bytesUsed += alloc0.getUsedBytes() + alloc1.getUsedBytes();
@@ -150,7 +158,11 @@ namespace embree
{
assert(alloc_i);
if (alloc.load() != alloc_i) return;
+#if defined(APPLE) && defined(__aarch64__)
+ std::scoped_lock lock(mutex);
+#else
Lock<SpinLock> lock(mutex);
+#endif
if (alloc.load() != alloc_i) return; // required as a different thread calls unbind
alloc.load()->bytesUsed += alloc0.getUsedBytes() + alloc1.getUsedBytes();
alloc.load()->bytesFree += alloc0.getFreeBytes() + alloc1.getFreeBytes();
@@ -161,7 +173,11 @@ namespace embree
}
public:
+#if defined(APPLE) && defined(__aarch64__)
+ std::mutex mutex;
+#else
SpinLock mutex; //!< required as unbind is called from other threads
+#endif
std::atomic<FastAllocator*> alloc; //!< parent allocator
ThreadLocal alloc0;
ThreadLocal alloc1;
@@ -169,7 +185,7 @@ namespace embree
FastAllocator (Device* device, bool osAllocation)
: device(device), slotMask(0), usedBlocks(nullptr), freeBlocks(nullptr), use_single_mode(false), defaultBlockSize(PAGE_SIZE), estimatedSize(0),
- growSize(PAGE_SIZE), maxGrowSize(maxAllocationSize), log2_grow_size_scale(0), bytesUsed(0), bytesFree(0), bytesWasted(0), atype(osAllocation ? OS_MALLOC : ALIGNED_MALLOC),
+ growSize(PAGE_SIZE), maxGrowSize(maxAllocationSize), log2_grow_size_scale(0), bytesUsed(0), bytesFree(0), bytesWasted(0), atype(osAllocation ? EMBREE_OS_MALLOC : ALIGNED_MALLOC),
primrefarray(device,0)
{
for (size_t i=0; i<MAX_THREAD_USED_BLOCK_SLOTS; i++)
@@ -206,7 +222,7 @@ namespace embree
void setOSallocation(bool flag)
{
- atype = flag ? OS_MALLOC : ALIGNED_MALLOC;
+ atype = flag ? EMBREE_OS_MALLOC : ALIGNED_MALLOC;
}
private:
@@ -217,7 +233,11 @@ namespace embree
ThreadLocal2* alloc = thread_local_allocator2;
if (alloc == nullptr) {
thread_local_allocator2 = alloc = new ThreadLocal2;
+#if defined(APPLE) && defined(__aarch64__)
+ std::scoped_lock lock(s_thread_local_allocators_lock);
+#else
Lock<SpinLock> lock(s_thread_local_allocators_lock);
+#endif
s_thread_local_allocators.push_back(make_unique(alloc));
}
return alloc;
@@ -227,7 +247,11 @@ namespace embree
__forceinline void join(ThreadLocal2* alloc)
{
+#if defined(APPLE) && defined(__aarch64__)
+ std::scoped_lock lock(s_thread_local_allocators_lock);
+#else
Lock<SpinLock> lock(thread_local_allocators_lock);
+#endif
thread_local_allocators.push_back(alloc);
}
@@ -492,7 +516,11 @@ namespace embree
/* parallel block creation in case of no freeBlocks, avoids single global mutex */
if (likely(freeBlocks.load() == nullptr))
{
+#if defined(APPLE) && defined(__aarch64__)
+ std::scoped_lock lock(slotMutex[slot]);
+#else
Lock<SpinLock> lock(slotMutex[slot]);
+#endif
if (myUsedBlocks == threadUsedBlocks[slot]) {
const size_t alignedBytes = (bytes+(align-1)) & ~(align-1);
const size_t allocSize = max(min(growSize,maxGrowSize),alignedBytes);
@@ -505,7 +533,11 @@ namespace embree
/* if this fails allocate new block */
{
- Lock<SpinLock> lock(mutex);
+#if defined(APPLE) && defined(__aarch64__)
+ std::scoped_lock lock(mutex);
+#else
+ Lock<SpinLock> lock(mutex);
+#endif
if (myUsedBlocks == threadUsedBlocks[slot])
{
if (freeBlocks.load() != nullptr) {
@@ -527,7 +559,11 @@ namespace embree
/*! add new block */
void addBlock(void* ptr, ssize_t bytes)
{
+#if defined(APPLE) && defined(__aarch64__)
+ std::scoped_lock lock(mutex);
+#else
Lock<SpinLock> lock(mutex);
+#endif
const size_t sizeof_Header = offsetof(Block,data[0]);
void* aptr = (void*) ((((size_t)ptr)+maxAlignment-1) & ~(maxAlignment-1));
size_t ofs = (size_t) aptr - (size_t) ptr;
@@ -613,8 +649,8 @@ namespace embree
bytesWasted(alloc->bytesWasted),
stat_all(alloc,ANY_TYPE),
stat_malloc(alloc,ALIGNED_MALLOC),
- stat_4K(alloc,OS_MALLOC,false),
- stat_2M(alloc,OS_MALLOC,true),
+ stat_4K(alloc,EMBREE_OS_MALLOC,false),
+ stat_2M(alloc,EMBREE_OS_MALLOC,true),
stat_shared(alloc,SHARED) {}
AllStatistics (size_t bytesUsed,
@@ -707,7 +743,7 @@ namespace embree
/* We avoid using os_malloc for small blocks as this could
* cause a risk of fragmenting the virtual address space and
* reach the limit of vm.max_map_count = 65k under Linux. */
- if (atype == OS_MALLOC && bytesAllocate < maxAllocationSize)
+ if (atype == EMBREE_OS_MALLOC && bytesAllocate < maxAllocationSize)
atype = ALIGNED_MALLOC;
/* we need to additionally allocate some header */
@@ -716,7 +752,7 @@ namespace embree
bytesReserve = sizeof_Header+bytesReserve;
/* consume full 4k pages with using os_malloc */
- if (atype == OS_MALLOC) {
+ if (atype == EMBREE_OS_MALLOC) {
bytesAllocate = ((bytesAllocate+PAGE_SIZE-1) & ~(PAGE_SIZE-1));
bytesReserve = ((bytesReserve +PAGE_SIZE-1) & ~(PAGE_SIZE-1));
}
@@ -748,11 +784,11 @@ namespace embree
return new (ptr) Block(ALIGNED_MALLOC,bytesAllocate-sizeof_Header,bytesAllocate-sizeof_Header,next,alignment);
}
}
- else if (atype == OS_MALLOC)
+ else if (atype == EMBREE_OS_MALLOC)
{
if (device) device->memoryMonitor(bytesAllocate,false);
bool huge_pages; ptr = os_malloc(bytesReserve,huge_pages);
- return new (ptr) Block(OS_MALLOC,bytesAllocate-sizeof_Header,bytesReserve-sizeof_Header,next,0,huge_pages);
+ return new (ptr) Block(EMBREE_OS_MALLOC,bytesAllocate-sizeof_Header,bytesReserve-sizeof_Header,next,0,huge_pages);
}
else
assert(false);
@@ -796,7 +832,7 @@ namespace embree
if (device) device->memoryMonitor(-sizeof_Alloced,true);
}
- else if (atype == OS_MALLOC) {
+ else if (atype == EMBREE_OS_MALLOC) {
size_t sizeof_This = sizeof_Header+reserveEnd;
os_free(this,sizeof_This,huge_pages);
if (device) device->memoryMonitor(-sizeof_Alloced,true);
@@ -857,7 +893,7 @@ namespace embree
bool hasType(AllocationType atype_i, bool huge_pages_i) const
{
if (atype_i == ANY_TYPE ) return true;
- else if (atype == OS_MALLOC) return atype_i == atype && huge_pages_i == huge_pages;
+ else if (atype == EMBREE_OS_MALLOC) return atype_i == atype && huge_pages_i == huge_pages;
else return atype_i == atype;
}
@@ -906,7 +942,7 @@ namespace embree
void print_block() const
{
if (atype == ALIGNED_MALLOC) std::cout << "A";
- else if (atype == OS_MALLOC) std::cout << "O";
+ else if (atype == EMBREE_OS_MALLOC) std::cout << "O";
else if (atype == SHARED) std::cout << "S";
if (huge_pages) std::cout << "H";
size_t bytesUsed = getBlockUsedBytes();
@@ -936,7 +972,11 @@ namespace embree
std::atomic<Block*> freeBlocks;
std::atomic<Block*> threadBlocks[MAX_THREAD_USED_BLOCK_SLOTS];
- SpinLock slotMutex[MAX_THREAD_USED_BLOCK_SLOTS];
+#if defined(APPLE) && defined(__aarch64__)
+ std::mutex slotMutex[MAX_THREAD_USED_BLOCK_SLOTS];
+#else
+ PaddedSpinLock slotMutex[MAX_THREAD_USED_BLOCK_SLOTS];
+#endif
bool use_single_mode;
size_t defaultBlockSize;
@@ -950,7 +990,11 @@ namespace embree
static __thread ThreadLocal2* thread_local_allocator2;
static SpinLock s_thread_local_allocators_lock;
static std::vector<std::unique_ptr<ThreadLocal2>> s_thread_local_allocators;
+#if defined(APPLE) && defined(__aarch64__)
+ std::mutex thread_local_allocators_lock;
+#else
SpinLock thread_local_allocators_lock;
+#endif
std::vector<ThreadLocal2*> thread_local_allocators;
AllocationType atype;
mvector<PrimRef> primrefarray; //!< primrefarray used to allocate nodes