diff options
Diffstat (limited to 'thirdparty/embree-aarch64/common/sys')
26 files changed, 0 insertions, 4551 deletions
diff --git a/thirdparty/embree-aarch64/common/sys/alloc.cpp b/thirdparty/embree-aarch64/common/sys/alloc.cpp deleted file mode 100644 index 12f143f131..0000000000 --- a/thirdparty/embree-aarch64/common/sys/alloc.cpp +++ /dev/null @@ -1,327 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "alloc.h" -#include "intrinsics.h" -#include "sysinfo.h" -#include "mutex.h" - -//////////////////////////////////////////////////////////////////////////////// -/// All Platforms -//////////////////////////////////////////////////////////////////////////////// - -namespace embree -{ - void* alignedMalloc(size_t size, size_t align) - { - if (size == 0) - return nullptr; - - assert((align & (align-1)) == 0); - void* ptr = _mm_malloc(size,align); - - if (size != 0 && ptr == nullptr) - // -- GODOT start -- - // throw std::bad_alloc(); - abort(); - // -- GODOT end -- - - return ptr; - } - - void alignedFree(void* ptr) - { - if (ptr) - _mm_free(ptr); - } - - static bool huge_pages_enabled = false; - static MutexSys os_init_mutex; - - __forceinline bool isHugePageCandidate(const size_t bytes) - { - if (!huge_pages_enabled) - return false; - - /* use huge pages only when memory overhead is low */ - const size_t hbytes = (bytes+PAGE_SIZE_2M-1) & ~size_t(PAGE_SIZE_2M-1); - return 66*(hbytes-bytes) < bytes; // at most 1.5% overhead - } -} - -//////////////////////////////////////////////////////////////////////////////// -/// Windows Platform -//////////////////////////////////////////////////////////////////////////////// - -#ifdef _WIN32 - -#define WIN32_LEAN_AND_MEAN -#include <windows.h> -#include <malloc.h> - -namespace embree -{ - bool win_enable_selockmemoryprivilege (bool verbose) - { - HANDLE hToken; - if (!OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY | TOKEN_ADJUST_PRIVILEGES, &hToken)) { - if (verbose) std::cout << "WARNING: OpenProcessToken failed while trying to enable SeLockMemoryPrivilege: " << GetLastError() << std::endl; - return false; - } - - TOKEN_PRIVILEGES tp; - tp.PrivilegeCount = 1; - tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - - if (!LookupPrivilegeValueW(nullptr, L"SeLockMemoryPrivilege", &tp.Privileges[0].Luid)) { - if (verbose) std::cout << "WARNING: LookupPrivilegeValue failed while trying to enable SeLockMemoryPrivilege: " << GetLastError() << std::endl; - return false; - } - - SetLastError(ERROR_SUCCESS); - if (!AdjustTokenPrivileges(hToken, FALSE, &tp, sizeof(tp), nullptr, 0)) { - if (verbose) std::cout << "WARNING: AdjustTokenPrivileges failed while trying to enable SeLockMemoryPrivilege" << std::endl; - return false; - } - - if (GetLastError() == ERROR_NOT_ALL_ASSIGNED) { - if (verbose) std::cout << "WARNING: AdjustTokenPrivileges failed to enable SeLockMemoryPrivilege: Add SeLockMemoryPrivilege for current user and run process in elevated mode (Run as administrator)." << std::endl; - return false; - } - - return true; - } - - bool os_init(bool hugepages, bool verbose) - { - Lock<MutexSys> lock(os_init_mutex); - - if (!hugepages) { - huge_pages_enabled = false; - return true; - } - - if (GetLargePageMinimum() != PAGE_SIZE_2M) { - huge_pages_enabled = false; - return false; - } - - huge_pages_enabled = true; - return true; - } - - void* os_malloc(size_t bytes, bool& hugepages) - { - if (bytes == 0) { - hugepages = false; - return nullptr; - } - - /* try direct huge page allocation first */ - if (isHugePageCandidate(bytes)) - { - int flags = MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES; - char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE); - if (ptr != nullptr) { - hugepages = true; - return ptr; - } - } - - /* fall back to 4k pages */ - int flags = MEM_COMMIT | MEM_RESERVE; - char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE); - // -- GODOT start -- - // if (ptr == nullptr) throw std::bad_alloc(); - if (ptr == nullptr) abort(); - // -- GODOT end -- - hugepages = false; - return ptr; - } - - size_t os_shrink(void* ptr, size_t bytesNew, size_t bytesOld, bool hugepages) - { - if (hugepages) // decommitting huge pages seems not to work under Windows - return bytesOld; - - const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K; - bytesNew = (bytesNew+pageSize-1) & ~(pageSize-1); - bytesOld = (bytesOld+pageSize-1) & ~(pageSize-1); - if (bytesNew >= bytesOld) - return bytesOld; - - if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT)) - // -- GODOT start -- - // throw std::bad_alloc(); - abort(); - // -- GODOT end -- - - return bytesNew; - } - - void os_free(void* ptr, size_t bytes, bool hugepages) - { - if (bytes == 0) - return; - - if (!VirtualFree(ptr,0,MEM_RELEASE)) - // -- GODOT start -- - // throw std::bad_alloc(); - abort(); - // -- GODOT end -- - } - - void os_advise(void *ptr, size_t bytes) - { - } -} - -#endif - -//////////////////////////////////////////////////////////////////////////////// -/// Unix Platform -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__UNIX__) - -#include <sys/mman.h> -#include <errno.h> -#include <stdlib.h> -#include <string.h> -#include <sstream> - -#if defined(__MACOSX__) -#include <mach/vm_statistics.h> -#endif - -namespace embree -{ - bool os_init(bool hugepages, bool verbose) - { - Lock<MutexSys> lock(os_init_mutex); - - if (!hugepages) { - huge_pages_enabled = false; - return true; - } - -#if defined(__LINUX__) - - int hugepagesize = 0; - - std::ifstream file; - file.open("/proc/meminfo",std::ios::in); - if (!file.is_open()) { - if (verbose) std::cout << "WARNING: Could not open /proc/meminfo. Huge page support cannot get enabled!" << std::endl; - huge_pages_enabled = false; - return false; - } - - std::string line; - while (getline(file,line)) - { - std::stringstream sline(line); - while (!sline.eof() && sline.peek() == ' ') sline.ignore(); - std::string tag; getline(sline,tag,' '); - while (!sline.eof() && sline.peek() == ' ') sline.ignore(); - std::string val; getline(sline,val,' '); - while (!sline.eof() && sline.peek() == ' ') sline.ignore(); - std::string unit; getline(sline,unit,' '); - if (tag == "Hugepagesize:" && unit == "kB") { - hugepagesize = std::stoi(val)*1024; - break; - } - } - - if (hugepagesize != PAGE_SIZE_2M) - { - if (verbose) std::cout << "WARNING: Only 2MB huge pages supported. Huge page support cannot get enabled!" << std::endl; - huge_pages_enabled = false; - return false; - } -#endif - - huge_pages_enabled = true; - return true; - } - - void* os_malloc(size_t bytes, bool& hugepages) - { - if (bytes == 0) { - hugepages = false; - return nullptr; - } - - /* try direct huge page allocation first */ - if (isHugePageCandidate(bytes)) - { -#if defined(__MACOSX__) - void* ptr = mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0); - if (ptr != MAP_FAILED) { - hugepages = true; - return ptr; - } -#elif defined(MAP_HUGETLB) - void* ptr = mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_HUGETLB, -1, 0); - if (ptr != MAP_FAILED) { - hugepages = true; - return ptr; - } -#endif - } - - /* fallback to 4k pages */ - void* ptr = (char*) mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); - // -- GODOT start -- - // if (ptr == MAP_FAILED) throw std::bad_alloc(); - if (ptr == MAP_FAILED) abort(); - // -- GODOT end -- - hugepages = false; - - /* advise huge page hint for THP */ - os_advise(ptr,bytes); - return ptr; - } - - size_t os_shrink(void* ptr, size_t bytesNew, size_t bytesOld, bool hugepages) - { - const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K; - bytesNew = (bytesNew+pageSize-1) & ~(pageSize-1); - bytesOld = (bytesOld+pageSize-1) & ~(pageSize-1); - if (bytesNew >= bytesOld) - return bytesOld; - - if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1) - // -- GODOT start -- - // throw std::bad_alloc(); - abort(); - // -- GODOT end -- - - return bytesNew; - } - - void os_free(void* ptr, size_t bytes, bool hugepages) - { - if (bytes == 0) - return; - - /* for hugepages we need to also align the size */ - const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K; - bytes = (bytes+pageSize-1) & ~(pageSize-1); - if (munmap(ptr,bytes) == -1) - // -- GODOT start -- - // throw std::bad_alloc(); - abort(); - // -- GODOT end -- - } - - /* hint for transparent huge pages (THP) */ - void os_advise(void* pptr, size_t bytes) - { -#if defined(MADV_HUGEPAGE) - madvise(pptr,bytes,MADV_HUGEPAGE); -#endif - } -} - -#endif diff --git a/thirdparty/embree-aarch64/common/sys/alloc.h b/thirdparty/embree-aarch64/common/sys/alloc.h deleted file mode 100644 index 5898ecda70..0000000000 --- a/thirdparty/embree-aarch64/common/sys/alloc.h +++ /dev/null @@ -1,164 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "platform.h" -#include <vector> -#include <set> - -namespace embree -{ -#define ALIGNED_STRUCT_(align) \ - void* operator new(size_t size) { return alignedMalloc(size,align); } \ - void operator delete(void* ptr) { alignedFree(ptr); } \ - void* operator new[](size_t size) { return alignedMalloc(size,align); } \ - void operator delete[](void* ptr) { alignedFree(ptr); } - -#define ALIGNED_CLASS_(align) \ - public: \ - ALIGNED_STRUCT_(align) \ - private: - - /*! aligned allocation */ - void* alignedMalloc(size_t size, size_t align); - void alignedFree(void* ptr); - - /*! allocator that performs aligned allocations */ - template<typename T, size_t alignment> - struct aligned_allocator - { - typedef T value_type; - typedef T* pointer; - typedef const T* const_pointer; - typedef T& reference; - typedef const T& const_reference; - typedef std::size_t size_type; - typedef std::ptrdiff_t difference_type; - - __forceinline pointer allocate( size_type n ) { - return (pointer) alignedMalloc(n*sizeof(value_type),alignment); - } - - __forceinline void deallocate( pointer p, size_type n ) { - return alignedFree(p); - } - - __forceinline void construct( pointer p, const_reference val ) { - new (p) T(val); - } - - __forceinline void destroy( pointer p ) { - p->~T(); - } - }; - - /*! allocates pages directly from OS */ - bool win_enable_selockmemoryprivilege(bool verbose); - bool os_init(bool hugepages, bool verbose); - void* os_malloc (size_t bytes, bool& hugepages); - size_t os_shrink (void* ptr, size_t bytesNew, size_t bytesOld, bool hugepages); - void os_free (void* ptr, size_t bytes, bool hugepages); - void os_advise (void* ptr, size_t bytes); - - /*! allocator that performs OS allocations */ - template<typename T> - struct os_allocator - { - typedef T value_type; - typedef T* pointer; - typedef const T* const_pointer; - typedef T& reference; - typedef const T& const_reference; - typedef std::size_t size_type; - typedef std::ptrdiff_t difference_type; - - __forceinline os_allocator () - : hugepages(false) {} - - __forceinline pointer allocate( size_type n ) { - return (pointer) os_malloc(n*sizeof(value_type),hugepages); - } - - __forceinline void deallocate( pointer p, size_type n ) { - return os_free(p,n*sizeof(value_type),hugepages); - } - - __forceinline void construct( pointer p, const_reference val ) { - new (p) T(val); - } - - __forceinline void destroy( pointer p ) { - p->~T(); - } - - bool hugepages; - }; - - /*! allocator for IDs */ - template<typename T, size_t max_id> - struct IDPool - { - typedef T value_type; - - IDPool () - : nextID(0) {} - - T allocate() - { - /* return ID from list */ - if (!IDs.empty()) - { - T id = *IDs.begin(); - IDs.erase(IDs.begin()); - return id; - } - - /* allocate new ID */ - else - { - if (size_t(nextID)+1 > max_id) - return -1; - - return nextID++; - } - } - - /* adds an ID provided by the user */ - bool add(T id) - { - if (id > max_id) - return false; - - /* check if ID should be in IDs set */ - if (id < nextID) { - auto p = IDs.find(id); - if (p == IDs.end()) return false; - IDs.erase(p); - return true; - } - - /* otherwise increase ID set */ - else - { - for (T i=nextID; i<id; i++) { - IDs.insert(i); - } - nextID = id+1; - return true; - } - } - - void deallocate( T id ) - { - assert(id < nextID); - MAYBE_UNUSED auto done = IDs.insert(id).second; - assert(done); - } - - private: - std::set<T> IDs; //!< stores deallocated IDs to be reused - T nextID; //!< next ID to use when IDs vector is empty - }; -} - diff --git a/thirdparty/embree-aarch64/common/sys/array.h b/thirdparty/embree-aarch64/common/sys/array.h deleted file mode 100644 index 77722a39f6..0000000000 --- a/thirdparty/embree-aarch64/common/sys/array.h +++ /dev/null @@ -1,222 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "platform.h" -#include "alloc.h" - -namespace embree -{ - /*! static array with static size */ - template<typename T, size_t N> - class array_t - { - public: - - /********************** Iterators ****************************/ - - __forceinline T* begin() const { return items; }; - __forceinline T* end () const { return items+N; }; - - - /********************** Capacity ****************************/ - - __forceinline bool empty () const { return N == 0; } - __forceinline size_t size () const { return N; } - __forceinline size_t max_size () const { return N; } - - - /******************** Element access **************************/ - - __forceinline T& operator[](size_t i) { assert(i < N); return items[i]; } - __forceinline const T& operator[](size_t i) const { assert(i < N); return items[i]; } - - __forceinline T& at(size_t i) { assert(i < N); return items[i]; } - __forceinline const T& at(size_t i) const { assert(i < N); return items[i]; } - - __forceinline T& front() const { assert(N > 0); return items[0]; }; - __forceinline T& back () const { assert(N > 0); return items[N-1]; }; - - __forceinline T* data() { return items; }; - __forceinline const T* data() const { return items; }; - - private: - T items[N]; - }; - - /*! static array with dynamic size */ - template<typename T, size_t N> - class darray_t - { - public: - - __forceinline darray_t () : M(0) {} - - __forceinline darray_t (const T& v) : M(0) { - for (size_t i=0; i<N; i++) items[i] = v; - } - - /********************** Iterators ****************************/ - - __forceinline T* begin() const { return items; }; - __forceinline T* end () const { return items+M; }; - - - /********************** Capacity ****************************/ - - __forceinline bool empty () const { return M == 0; } - __forceinline size_t size () const { return M; } - __forceinline size_t capacity () const { return N; } - __forceinline size_t max_size () const { return N; } - - void resize(size_t new_size) { - assert(new_size < max_size()); - M = new_size; - } - - /******************** Modifiers **************************/ - - __forceinline void push_back(const T& v) - { - assert(M+1 < max_size()); - items[M++] = v; - } - - __forceinline void pop_back() - { - assert(!empty()); - M--; - } - - __forceinline void clear() { - M = 0; - } - - /******************** Element access **************************/ - - __forceinline T& operator[](size_t i) { assert(i < M); return items[i]; } - __forceinline const T& operator[](size_t i) const { assert(i < M); return items[i]; } - - __forceinline T& at(size_t i) { assert(i < M); return items[i]; } - __forceinline const T& at(size_t i) const { assert(i < M); return items[i]; } - - __forceinline T& front() const { assert(M > 0); return items[0]; }; - __forceinline T& back () const { assert(M > 0); return items[M-1]; }; - - __forceinline T* data() { return items; }; - __forceinline const T* data() const { return items; }; - - private: - size_t M; - T items[N]; - }; - - /*! dynamic sized array that is allocated on the stack */ -#define dynamic_large_stack_array(Ty,Name,N,max_stack_bytes) StackArray<Ty,max_stack_bytes> Name(N) - template<typename Ty, size_t max_stack_bytes> - struct __aligned(64) StackArray - { - __forceinline StackArray (const size_t N) - : N(N) - { - if (N*sizeof(Ty) <= max_stack_bytes) - data = &arr[0]; - else - data = (Ty*) alignedMalloc(N*sizeof(Ty),64); - } - - __forceinline ~StackArray () { - if (data != &arr[0]) alignedFree(data); - } - - __forceinline operator Ty* () { return data; } - __forceinline operator const Ty* () const { return data; } - - __forceinline Ty& operator[](const int i) { assert(i>=0 && i<N); return data[i]; } - __forceinline const Ty& operator[](const int i) const { assert(i>=0 && i<N); return data[i]; } - - __forceinline Ty& operator[](const unsigned i) { assert(i<N); return data[i]; } - __forceinline const Ty& operator[](const unsigned i) const { assert(i<N); return data[i]; } - -#if defined(__X86_64__) || defined(__aarch64__) - __forceinline Ty& operator[](const size_t i) { assert(i<N); return data[i]; } - __forceinline const Ty& operator[](const size_t i) const { assert(i<N); return data[i]; } -#endif - - private: - Ty arr[max_stack_bytes/sizeof(Ty)]; - Ty* data; - size_t N; - - private: - StackArray (const StackArray& other) DELETED; // do not implement - StackArray& operator= (const StackArray& other) DELETED; // do not implement - - }; - - /*! dynamic sized array that is allocated on the stack */ - template<typename Ty, size_t max_stack_elements, size_t max_total_elements> - struct __aligned(64) DynamicStackArray - { - __forceinline DynamicStackArray () - : data(&arr[0]) {} - - __forceinline ~DynamicStackArray () - { - if (!isStackAllocated()) - delete[] data; - } - - __forceinline bool isStackAllocated() const { - return data == &arr[0]; - } - - __forceinline size_t size() const - { - if (isStackAllocated()) return max_stack_elements; - else return max_total_elements; - } - - __forceinline void resize(size_t M) - { - assert(M <= max_total_elements); - if (likely(M <= max_stack_elements)) return; - if (likely(!isStackAllocated())) return; - - data = new Ty[max_total_elements]; - - for (size_t i=0; i<max_stack_elements; i++) - data[i] = arr[i]; - } - - __forceinline operator Ty* () { return data; } - __forceinline operator const Ty* () const { return data; } - - __forceinline Ty& operator[](const int i) { assert(i>=0 && i<max_total_elements); resize(i+1); return data[i]; } - __forceinline Ty& operator[](const unsigned i) { assert(i<max_total_elements); resize(i+1); return data[i]; } - -#if defined(__X86_64__) || defined(__aarch64__) - __forceinline Ty& operator[](const size_t i) { assert(i<max_total_elements); resize(i+1); return data[i]; } -#endif - - __forceinline DynamicStackArray (const DynamicStackArray& other) - : data(&arr[0]) - { - for (size_t i=0; i<other.size(); i++) - this->operator[] (i) = other[i]; - } - - DynamicStackArray& operator= (const DynamicStackArray& other) - { - for (size_t i=0; i<other.size(); i++) - this->operator[] (i) = other[i]; - - return *this; - } - - private: - Ty arr[max_stack_elements]; - Ty* data; - }; -} diff --git a/thirdparty/embree-aarch64/common/sys/atomic.h b/thirdparty/embree-aarch64/common/sys/atomic.h deleted file mode 100644 index ebfb8552c3..0000000000 --- a/thirdparty/embree-aarch64/common/sys/atomic.h +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include <atomic> -#include "intrinsics.h" - -namespace embree -{ -/* compiler memory barriers */ -#if defined(__INTEL_COMPILER) -//#define __memory_barrier() __memory_barrier() -#elif defined(__GNUC__) || defined(__clang__) -# define __memory_barrier() asm volatile("" ::: "memory") -#elif defined(_MSC_VER) -# define __memory_barrier() _ReadWriteBarrier() -#endif - - template <typename T> - struct atomic : public std::atomic<T> - { - atomic () {} - - atomic (const T& a) - : std::atomic<T>(a) {} - - atomic (const atomic<T>& a) { - this->store(a.load()); - } - - atomic& operator=(const atomic<T>& other) { - this->store(other.load()); - return *this; - } - }; - - template<typename T> - __forceinline void atomic_min(std::atomic<T>& aref, const T& bref) - { - const T b = bref.load(); - while (true) { - T a = aref.load(); - if (a <= b) break; - if (aref.compare_exchange_strong(a,b)) break; - } - } - - template<typename T> - __forceinline void atomic_max(std::atomic<T>& aref, const T& bref) - { - const T b = bref.load(); - while (true) { - T a = aref.load(); - if (a >= b) break; - if (aref.compare_exchange_strong(a,b)) break; - } - } -} diff --git a/thirdparty/embree-aarch64/common/sys/barrier.cpp b/thirdparty/embree-aarch64/common/sys/barrier.cpp deleted file mode 100644 index 0061d18db2..0000000000 --- a/thirdparty/embree-aarch64/common/sys/barrier.cpp +++ /dev/null @@ -1,289 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "barrier.h" -#include "condition.h" -#include "regression.h" -#include "thread.h" - -#if defined (__WIN32__) - -#define WIN32_LEAN_AND_MEAN -#include <windows.h> - -namespace embree -{ - struct BarrierSysImplementation - { - __forceinline BarrierSysImplementation (size_t N) - : i(0), enterCount(0), exitCount(0), barrierSize(0) - { - events[0] = CreateEvent(nullptr, TRUE, FALSE, nullptr); - events[1] = CreateEvent(nullptr, TRUE, FALSE, nullptr); - init(N); - } - - __forceinline ~BarrierSysImplementation () - { - CloseHandle(events[0]); - CloseHandle(events[1]); - } - - __forceinline void init(size_t N) - { - barrierSize = N; - enterCount.store(N); - exitCount.store(N); - } - - __forceinline void wait() - { - /* every thread entering the barrier decrements this count */ - size_t i0 = i; - size_t cnt0 = enterCount--; - - /* all threads except the last one are wait in the barrier */ - if (cnt0 > 1) - { - if (WaitForSingleObject(events[i0], INFINITE) != WAIT_OBJECT_0) - THROW_RUNTIME_ERROR("WaitForSingleObjects failed"); - } - - /* the last thread starts all threads waiting at the barrier */ - else - { - i = 1-i; - enterCount.store(barrierSize); - if (SetEvent(events[i0]) == 0) - THROW_RUNTIME_ERROR("SetEvent failed"); - } - - /* every thread leaving the barrier decrements this count */ - size_t cnt1 = exitCount--; - - /* the last thread that left the barrier resets the event again */ - if (cnt1 == 1) - { - exitCount.store(barrierSize); - if (ResetEvent(events[i0]) == 0) - THROW_RUNTIME_ERROR("ResetEvent failed"); - } - } - - public: - HANDLE events[2]; - atomic<size_t> i; - atomic<size_t> enterCount; - atomic<size_t> exitCount; - size_t barrierSize; - }; -} - -#else - -namespace embree -{ - struct BarrierSysImplementation - { - __forceinline BarrierSysImplementation (size_t N) - : count(0), barrierSize(0) - { - init(N); - } - - __forceinline void init(size_t N) - { - assert(count == 0); - count = 0; - barrierSize = N; - } - - __forceinline void wait() - { - mutex.lock(); - count++; - - if (count == barrierSize) { - count = 0; - cond.notify_all(); - mutex.unlock(); - return; - } - - cond.wait(mutex); - mutex.unlock(); - return; - } - - public: - MutexSys mutex; - ConditionSys cond; - volatile size_t count; - volatile size_t barrierSize; - }; -} - -#endif - -namespace embree -{ - BarrierSys::BarrierSys (size_t N) { - opaque = new BarrierSysImplementation(N); - } - - BarrierSys::~BarrierSys () { - delete (BarrierSysImplementation*) opaque; - } - - void BarrierSys::init(size_t count) { - ((BarrierSysImplementation*) opaque)->init(count); - } - - void BarrierSys::wait() { - ((BarrierSysImplementation*) opaque)->wait(); - } - - LinearBarrierActive::LinearBarrierActive (size_t N) - : count0(nullptr), count1(nullptr), mode(0), flag0(0), flag1(0), threadCount(0) - { - if (N == 0) N = getNumberOfLogicalThreads(); - init(N); - } - - LinearBarrierActive::~LinearBarrierActive() - { - delete[] count0; - delete[] count1; - } - - void LinearBarrierActive::init(size_t N) - { - if (threadCount != N) { - threadCount = N; - if (count0) delete[] count0; count0 = new unsigned char[N]; - if (count1) delete[] count1; count1 = new unsigned char[N]; - } - mode = 0; - flag0 = 0; - flag1 = 0; - for (size_t i=0; i<N; i++) count0[i] = 0; - for (size_t i=0; i<N; i++) count1[i] = 0; - } - - void LinearBarrierActive::wait (const size_t threadIndex) - { - if (mode == 0) - { - if (threadIndex == 0) - { - for (size_t i=0; i<threadCount; i++) - count1[i] = 0; - - for (size_t i=1; i<threadCount; i++) - { - while (likely(count0[i] == 0)) - pause_cpu(); - } - mode = 1; - flag1 = 0; - __memory_barrier(); - flag0 = 1; - } - else - { - count0[threadIndex] = 1; - { - while (likely(flag0 == 0)) - pause_cpu(); - } - - } - } - else - { - if (threadIndex == 0) - { - for (size_t i=0; i<threadCount; i++) - count0[i] = 0; - - for (size_t i=1; i<threadCount; i++) - { - while (likely(count1[i] == 0)) - pause_cpu(); - } - - mode = 0; - flag0 = 0; - __memory_barrier(); - flag1 = 1; - } - else - { - count1[threadIndex] = 1; - { - while (likely(flag1 == 0)) - pause_cpu(); - } - } - } - } - - struct barrier_sys_regression_test : public RegressionTest - { - BarrierSys barrier; - std::atomic<size_t> threadID; - std::atomic<size_t> numFailed; - std::vector<size_t> threadResults; - - barrier_sys_regression_test() - : RegressionTest("barrier_sys_regression_test"), threadID(0), numFailed(0) - { - registerRegressionTest(this); - } - - static void thread_alloc(barrier_sys_regression_test* This) - { - size_t tid = This->threadID++; - for (size_t j=0; j<1000; j++) - { - This->barrier.wait(); - This->threadResults[tid] = tid; - This->barrier.wait(); - } - } - - bool run () - { - threadID.store(0); - numFailed.store(0); - - size_t numThreads = getNumberOfLogicalThreads(); - threadResults.resize(numThreads); - barrier.init(numThreads+1); - - /* create threads */ - std::vector<thread_t> threads; - for (size_t i=0; i<numThreads; i++) - threads.push_back(createThread((thread_func)thread_alloc,this)); - - /* run test */ - for (size_t i=0; i<1000; i++) - { - for (size_t i=0; i<numThreads; i++) threadResults[i] = 0; - barrier.wait(); - barrier.wait(); - for (size_t i=0; i<numThreads; i++) numFailed += threadResults[i] != i; - } - - /* destroy threads */ - for (size_t i=0; i<numThreads; i++) - join(threads[i]); - - return numFailed == 0; - } - }; - - barrier_sys_regression_test barrier_sys_regression_test; -} - - diff --git a/thirdparty/embree-aarch64/common/sys/barrier.h b/thirdparty/embree-aarch64/common/sys/barrier.h deleted file mode 100644 index 89607b8685..0000000000 --- a/thirdparty/embree-aarch64/common/sys/barrier.h +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "intrinsics.h" -#include "sysinfo.h" -#include "atomic.h" - -namespace embree -{ - /*! system barrier using operating system */ - class BarrierSys - { - public: - - /*! construction / destruction */ - BarrierSys (size_t N = 0); - ~BarrierSys (); - - private: - /*! class in non-copyable */ - BarrierSys (const BarrierSys& other) DELETED; // do not implement - BarrierSys& operator= (const BarrierSys& other) DELETED; // do not implement - - public: - /*! intializes the barrier with some number of threads */ - void init(size_t count); - - /*! lets calling thread wait in barrier */ - void wait(); - - private: - void* opaque; - }; - - /*! fast active barrier using atomitc counter */ - struct BarrierActive - { - public: - BarrierActive () - : cntr(0) {} - - void reset() { - cntr.store(0); - } - - void wait (size_t numThreads) - { - cntr++; - while (cntr.load() != numThreads) - pause_cpu(); - } - - private: - std::atomic<size_t> cntr; - }; - - /*! fast active barrier that does not require initialization to some number of threads */ - struct BarrierActiveAutoReset - { - public: - BarrierActiveAutoReset () - : cntr0(0), cntr1(0) {} - - void wait (size_t threadCount) - { - cntr0.fetch_add(1); - while (cntr0 != threadCount) pause_cpu(); - cntr1.fetch_add(1); - while (cntr1 != threadCount) pause_cpu(); - cntr0.fetch_add(-1); - while (cntr0 != 0) pause_cpu(); - cntr1.fetch_add(-1); - while (cntr1 != 0) pause_cpu(); - } - - private: - std::atomic<size_t> cntr0; - std::atomic<size_t> cntr1; - }; - - class LinearBarrierActive - { - public: - - /*! construction and destruction */ - LinearBarrierActive (size_t threadCount = 0); - ~LinearBarrierActive(); - - private: - /*! class in non-copyable */ - LinearBarrierActive (const LinearBarrierActive& other) DELETED; // do not implement - LinearBarrierActive& operator= (const LinearBarrierActive& other) DELETED; // do not implement - - public: - /*! intializes the barrier with some number of threads */ - void init(size_t threadCount); - - /*! thread with threadIndex waits in the barrier */ - void wait (const size_t threadIndex); - - private: - volatile unsigned char* count0; - volatile unsigned char* count1; - volatile unsigned int mode; - volatile unsigned int flag0; - volatile unsigned int flag1; - volatile size_t threadCount; - }; -} - diff --git a/thirdparty/embree-aarch64/common/sys/condition.cpp b/thirdparty/embree-aarch64/common/sys/condition.cpp deleted file mode 100644 index 0e7ca7af39..0000000000 --- a/thirdparty/embree-aarch64/common/sys/condition.cpp +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "condition.h" - -#if defined(__WIN32__) && !defined(PTHREADS_WIN32) - -#define WIN32_LEAN_AND_MEAN -#include <windows.h> - -namespace embree -{ - struct ConditionImplementation - { - __forceinline ConditionImplementation () { - InitializeConditionVariable(&cond); - } - - __forceinline ~ConditionImplementation () { - } - - __forceinline void wait(MutexSys& mutex_in) { - SleepConditionVariableCS(&cond, (LPCRITICAL_SECTION)mutex_in.mutex, INFINITE); - } - - __forceinline void notify_all() { - WakeAllConditionVariable(&cond); - } - - public: - CONDITION_VARIABLE cond; - }; -} -#endif - -#if defined(__UNIX__) || defined(PTHREADS_WIN32) -#include <pthread.h> -namespace embree -{ - struct ConditionImplementation - { - __forceinline ConditionImplementation () { - pthread_cond_init(&cond,nullptr); - } - - __forceinline ~ConditionImplementation() { - pthread_cond_destroy(&cond); - } - - __forceinline void wait(MutexSys& mutex) { - pthread_cond_wait(&cond, (pthread_mutex_t*)mutex.mutex); - } - - __forceinline void notify_all() { - pthread_cond_broadcast(&cond); - } - - public: - pthread_cond_t cond; - }; -} -#endif - -namespace embree -{ - ConditionSys::ConditionSys () { - cond = new ConditionImplementation; - } - - ConditionSys::~ConditionSys() { - delete (ConditionImplementation*) cond; - } - - void ConditionSys::wait(MutexSys& mutex) { - ((ConditionImplementation*) cond)->wait(mutex); - } - - void ConditionSys::notify_all() { - ((ConditionImplementation*) cond)->notify_all(); - } -} diff --git a/thirdparty/embree-aarch64/common/sys/condition.h b/thirdparty/embree-aarch64/common/sys/condition.h deleted file mode 100644 index 7a3a05aa81..0000000000 --- a/thirdparty/embree-aarch64/common/sys/condition.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "mutex.h" - -namespace embree -{ - class ConditionSys - { - public: - ConditionSys(); - ~ConditionSys(); - void wait( class MutexSys& mutex ); - void notify_all(); - - template<typename Predicate> - __forceinline void wait( class MutexSys& mutex, const Predicate& pred ) - { - while (!pred()) wait(mutex); - } - - private: - ConditionSys (const ConditionSys& other) DELETED; // do not implement - ConditionSys& operator= (const ConditionSys& other) DELETED; // do not implement - - protected: - void* cond; - }; -} diff --git a/thirdparty/embree-aarch64/common/sys/filename.cpp b/thirdparty/embree-aarch64/common/sys/filename.cpp deleted file mode 100644 index 86182c1afb..0000000000 --- a/thirdparty/embree-aarch64/common/sys/filename.cpp +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "filename.h" -#include "sysinfo.h" - -namespace embree -{ -#ifdef __WIN32__ - const char path_sep = '\\'; -#else - const char path_sep = '/'; -#endif - - /*! create an empty filename */ - FileName::FileName () {} - - /*! create a valid filename from a string */ - FileName::FileName (const char* in) { - filename = in; - for (size_t i=0; i<filename.size(); i++) - if (filename[i] == '\\' || filename[i] == '/') - filename[i] = path_sep; - while (!filename.empty() && filename[filename.size()-1] == path_sep) - filename.resize(filename.size()-1); - } - - /*! create a valid filename from a string */ - FileName::FileName (const std::string& in) { - filename = in; - for (size_t i=0; i<filename.size(); i++) - if (filename[i] == '\\' || filename[i] == '/') - filename[i] = path_sep; - while (!filename.empty() && filename[filename.size()-1] == path_sep) - filename.resize(filename.size()-1); - } - - /*! returns path to home folder */ - FileName FileName::homeFolder() - { -#ifdef __WIN32__ - const char* home = getenv("UserProfile"); -#else - const char* home = getenv("HOME"); -#endif - if (home) return home; - return ""; - } - - /*! returns path to executable */ - FileName FileName::executableFolder() { - return FileName(getExecutableFileName()).path(); - } - - /*! returns the path */ - FileName FileName::path() const { - size_t pos = filename.find_last_of(path_sep); - if (pos == std::string::npos) return FileName(); - return filename.substr(0,pos); - } - - /*! returns the basename */ - std::string FileName::base() const { - size_t pos = filename.find_last_of(path_sep); - if (pos == std::string::npos) return filename; - return filename.substr(pos+1); - } - - /*! returns the extension */ - std::string FileName::ext() const { - size_t pos = filename.find_last_of('.'); - if (pos == std::string::npos) return ""; - return filename.substr(pos+1); - } - - /*! returns the extension */ - FileName FileName::dropExt() const { - size_t pos = filename.find_last_of('.'); - if (pos == std::string::npos) return filename; - return filename.substr(0,pos); - } - - /*! returns the basename without extension */ - std::string FileName::name() const { - size_t start = filename.find_last_of(path_sep); - if (start == std::string::npos) start = 0; else start++; - size_t end = filename.find_last_of('.'); - if (end == std::string::npos || end < start) end = filename.size(); - return filename.substr(start, end - start); - } - - /*! replaces the extension */ - FileName FileName::setExt(const std::string& ext) const { - size_t start = filename.find_last_of(path_sep); - if (start == std::string::npos) start = 0; else start++; - size_t end = filename.find_last_of('.'); - if (end == std::string::npos || end < start) return FileName(filename+ext); - return FileName(filename.substr(0,end)+ext); - } - - /*! adds the extension */ - FileName FileName::addExt(const std::string& ext) const { - return FileName(filename+ext); - } - - /*! concatenates two filenames to this/other */ - FileName FileName::operator +( const FileName& other ) const { - if (filename == "") return FileName(other); - else return FileName(filename + path_sep + other.filename); - } - - /*! concatenates two filenames to this/other */ - FileName FileName::operator +( const std::string& other ) const { - return operator+(FileName(other)); - } - - /*! removes the base from a filename (if possible) */ - FileName FileName::operator -( const FileName& base ) const { - size_t pos = filename.find_first_of(base); - if (pos == std::string::npos) return *this; - return FileName(filename.substr(pos+1)); - } - - /*! == operator */ - bool operator== (const FileName& a, const FileName& b) { - return a.filename == b.filename; - } - - /*! != operator */ - bool operator!= (const FileName& a, const FileName& b) { - return a.filename != b.filename; - } - - /*! output operator */ - std::ostream& operator<<(std::ostream& cout, const FileName& filename) { - return cout << filename.filename; - } -} diff --git a/thirdparty/embree-aarch64/common/sys/filename.h b/thirdparty/embree-aarch64/common/sys/filename.h deleted file mode 100644 index 58f881b14d..0000000000 --- a/thirdparty/embree-aarch64/common/sys/filename.h +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "platform.h" - -namespace embree -{ - /*! Convenience class for handling file names and paths. */ - class FileName - { - public: - - /*! create an empty filename */ - FileName (); - - /*! create a valid filename from a string */ - FileName (const char* filename); - - /*! create a valid filename from a string */ - FileName (const std::string& filename); - - /*! returns path to home folder */ - static FileName homeFolder(); - - /*! returns path to executable */ - static FileName executableFolder(); - - /*! auto convert into a string */ - operator std::string() const { return filename; } - - /*! returns a string of the filename */ - const std::string str() const { return filename; } - - /*! returns a c-string of the filename */ - const char* c_str() const { return filename.c_str(); } - - /*! returns the path of a filename */ - FileName path() const; - - /*! returns the file of a filename */ - std::string base() const; - - /*! returns the base of a filename without extension */ - std::string name() const; - - /*! returns the file extension */ - std::string ext() const; - - /*! drops the file extension */ - FileName dropExt() const; - - /*! replaces the file extension */ - FileName setExt(const std::string& ext = "") const; - - /*! adds file extension */ - FileName addExt(const std::string& ext = "") const; - - /*! concatenates two filenames to this/other */ - FileName operator +( const FileName& other ) const; - - /*! concatenates two filenames to this/other */ - FileName operator +( const std::string& other ) const; - - /*! removes the base from a filename (if possible) */ - FileName operator -( const FileName& base ) const; - - /*! == operator */ - friend bool operator==(const FileName& a, const FileName& b); - - /*! != operator */ - friend bool operator!=(const FileName& a, const FileName& b); - - /*! output operator */ - friend embree_ostream operator<<(embree_ostream cout, const FileName& filename); - - private: - std::string filename; - }; -} diff --git a/thirdparty/embree-aarch64/common/sys/intrinsics.h b/thirdparty/embree-aarch64/common/sys/intrinsics.h deleted file mode 100644 index 44cdbd8f0f..0000000000 --- a/thirdparty/embree-aarch64/common/sys/intrinsics.h +++ /dev/null @@ -1,559 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "platform.h" - -#if defined(__WIN32__) -#include <intrin.h> -#endif - -#if defined(__ARM_NEON) -#include "../math/SSE2NEON.h" -#if defined(NEON_AVX2_EMULATION) -#include "../math/AVX2NEON.h" -#endif -#else -#include <immintrin.h> -#endif - -#if defined(__BMI__) && defined(__GNUC__) && !defined(__INTEL_COMPILER) - #if !defined(_tzcnt_u32) - #define _tzcnt_u32 __tzcnt_u32 - #endif - #if !defined(_tzcnt_u64) - #define _tzcnt_u64 __tzcnt_u64 - #endif -#endif - -#if defined(__aarch64__) -#if !defined(_lzcnt_u32) - #define _lzcnt_u32 __builtin_clz -#endif -#if !defined(_lzcnt_u32) - #define _lzcnt_u32 __builtin_clzll -#endif -#else -#if defined(__LZCNT__) - #if !defined(_lzcnt_u32) - #define _lzcnt_u32 __lzcnt32 - #endif - #if !defined(_lzcnt_u64) - #define _lzcnt_u64 __lzcnt64 - #endif -#endif -#endif - -#if defined(__WIN32__) -# ifndef NOMINMAX -# define NOMINMAX -# endif -# include <windows.h> -#endif - -/* normally defined in pmmintrin.h, but we always need this */ -#if !defined(_MM_SET_DENORMALS_ZERO_MODE) -#define _MM_DENORMALS_ZERO_ON (0x0040) -#define _MM_DENORMALS_ZERO_OFF (0x0000) -#define _MM_DENORMALS_ZERO_MASK (0x0040) -#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) -#endif - -namespace embree -{ - -//////////////////////////////////////////////////////////////////////////////// -/// Windows Platform -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__WIN32__) - - __forceinline size_t read_tsc() - { - LARGE_INTEGER li; - QueryPerformanceCounter(&li); - return (size_t)li.QuadPart; - } - - __forceinline int bsf(int v) { -#if defined(__AVX2__) && !defined(__aarch64__) - return _tzcnt_u32(v); -#else - unsigned long r = 0; _BitScanForward(&r,v); return r; -#endif - } - - __forceinline unsigned bsf(unsigned v) { -#if defined(__AVX2__) && !defined(__aarch64__) - return _tzcnt_u32(v); -#else - unsigned long r = 0; _BitScanForward(&r,v); return r; -#endif - } - -#if defined(__X86_64__) - __forceinline size_t bsf(size_t v) { -#if defined(__AVX2__) - return _tzcnt_u64(v); -#else - unsigned long r = 0; _BitScanForward64(&r,v); return r; -#endif - } -#endif - - __forceinline int bscf(int& v) - { - int i = bsf(v); - v &= v-1; - return i; - } - - __forceinline unsigned bscf(unsigned& v) - { - unsigned i = bsf(v); - v &= v-1; - return i; - } - -#if defined(__X86_64__) - __forceinline size_t bscf(size_t& v) - { - size_t i = bsf(v); - v &= v-1; - return i; - } -#endif - - __forceinline int bsr(int v) { -#if defined(__AVX2__) && !defined(__aarch64__) - return 31 - _lzcnt_u32(v); -#else - unsigned long r = 0; _BitScanReverse(&r,v); return r; -#endif - } - - __forceinline unsigned bsr(unsigned v) { -#if defined(__AVX2__) && !defined(__aarch64__) - return 31 - _lzcnt_u32(v); -#else - unsigned long r = 0; _BitScanReverse(&r,v); return r; -#endif - } - -#if defined(__X86_64__) - __forceinline size_t bsr(size_t v) { -#if defined(__AVX2__) - return 63 -_lzcnt_u64(v); -#else - unsigned long r = 0; _BitScanReverse64(&r, v); return r; -#endif - } -#endif - - __forceinline int lzcnt(const int x) - { -#if defined(__AVX2__) && !defined(__aarch64__) - return _lzcnt_u32(x); -#else - if (unlikely(x == 0)) return 32; - return 31 - bsr(x); -#endif - } - - __forceinline int btc(int v, int i) { - long r = v; _bittestandcomplement(&r,i); return r; - } - - __forceinline int bts(int v, int i) { - long r = v; _bittestandset(&r,i); return r; - } - - __forceinline int btr(int v, int i) { - long r = v; _bittestandreset(&r,i); return r; - } - -#if defined(__X86_64__) - - __forceinline size_t btc(size_t v, size_t i) { - size_t r = v; _bittestandcomplement64((__int64*)&r,i); return r; - } - - __forceinline size_t bts(size_t v, size_t i) { - __int64 r = v; _bittestandset64(&r,i); return r; - } - - __forceinline size_t btr(size_t v, size_t i) { - __int64 r = v; _bittestandreset64(&r,i); return r; - } - -#endif - - __forceinline int32_t atomic_cmpxchg(volatile int32_t* p, const int32_t c, const int32_t v) { - return _InterlockedCompareExchange((volatile long*)p,v,c); - } - -//////////////////////////////////////////////////////////////////////////////// -/// Unix Platform -//////////////////////////////////////////////////////////////////////////////// - -#else - -#if defined(__i386__) && defined(__PIC__) - - __forceinline void __cpuid(int out[4], int op) - { - asm volatile ("xchg{l}\t{%%}ebx, %1\n\t" - "cpuid\n\t" - "xchg{l}\t{%%}ebx, %1\n\t" - : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3]) - : "0"(op)); - } - - __forceinline void __cpuid_count(int out[4], int op1, int op2) - { - asm volatile ("xchg{l}\t{%%}ebx, %1\n\t" - "cpuid\n\t" - "xchg{l}\t{%%}ebx, %1\n\t" - : "=a" (out[0]), "=r" (out[1]), "=c" (out[2]), "=d" (out[3]) - : "0" (op1), "2" (op2)); - } - -#else - - __forceinline void __cpuid(int out[4], int op) { -#if defined(__ARM_NEON) - if (op == 0) { // Get CPU name - out[0] = 0x41524d20; - out[1] = 0x41524d20; - out[2] = 0x41524d20; - out[3] = 0x41524d20; - } -#else - asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op)); -#endif - } - -#if !defined(__ARM_NEON) - __forceinline void __cpuid_count(int out[4], int op1, int op2) { - asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op1), "c"(op2)); - } -#endif - -#endif - - __forceinline uint64_t read_tsc() { -#if defined(__ARM_NEON) - return 0; // FIXME(LTE): mimic rdtsc -#else - uint32_t high,low; - asm volatile ("rdtsc" : "=d"(high), "=a"(low)); - return (((uint64_t)high) << 32) + (uint64_t)low; -#endif - } - - __forceinline int bsf(int v) { -#if defined(__ARM_NEON) - return __builtin_ctz(v); -#else -#if defined(__AVX2__) - return _tzcnt_u32(v); -#else - int r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r; -#endif -#endif - } - -#if defined(__X86_64__) || defined(__aarch64__) - __forceinline unsigned bsf(unsigned v) - { -#if defined(__ARM_NEON) - return __builtin_ctz(v); -#else -#if defined(__AVX2__) - return _tzcnt_u32(v); -#else - unsigned r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r; -#endif -#endif - } -#endif - - __forceinline size_t bsf(size_t v) { -#if defined(__AVX2__) && !defined(__aarch64__) -#if defined(__X86_64__) - return _tzcnt_u64(v); -#else - return _tzcnt_u32(v); -#endif -#elif defined(__ARM_NEON) - return __builtin_ctzl(v); -#else - size_t r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r; -#endif - } - - __forceinline int bscf(int& v) - { - int i = bsf(v); - v &= v-1; - return i; - } - -#if defined(__X86_64__) || defined(__aarch64__) - __forceinline unsigned int bscf(unsigned int& v) - { - unsigned int i = bsf(v); - v &= v-1; - return i; - } -#endif - - __forceinline size_t bscf(size_t& v) - { - size_t i = bsf(v); - v &= v-1; - return i; - } - - __forceinline int bsr(int v) { -#if defined(__AVX2__) && !defined(__aarch64__) - return 31 - _lzcnt_u32(v); -#elif defined(__ARM_NEON) - return __builtin_clz(v)^31; -#else - int r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r; -#endif - } - -#if defined(__X86_64__) || defined(__aarch64__) - __forceinline unsigned bsr(unsigned v) { -#if defined(__AVX2__) - return 31 - _lzcnt_u32(v); -#elif defined(__ARM_NEON) - return __builtin_clz(v)^31; -#else - unsigned r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r; -#endif - } -#endif - - __forceinline size_t bsr(size_t v) { -#if defined(__AVX2__) && !defined(__aarch64__) -#if defined(__X86_64__) - return 63 - _lzcnt_u64(v); -#else - return 31 - _lzcnt_u32(v); -#endif -#elif defined(__aarch64__) - return (sizeof(v) * 8 - 1) - __builtin_clzl(v); -#else - size_t r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r; -#endif - } - - __forceinline int lzcnt(const int x) - { -#if defined(__AVX2__) && !defined(__aarch64__) - return _lzcnt_u32(x); -#else - if (unlikely(x == 0)) return 32; - return 31 - bsr(x); -#endif - } - - __forceinline size_t blsr(size_t v) { -#if defined(__AVX2__) && !defined(__aarch64__) -#if defined(__INTEL_COMPILER) - return _blsr_u64(v); -#else -#if defined(__X86_64__) - return __blsr_u64(v); -#else - return __blsr_u32(v); -#endif -#endif -#else - return v & (v-1); -#endif - } - - __forceinline int btc(int v, int i) { -#if defined(__aarch64__) - // _bittestandcomplement(long *a, long b) { - // unsigned char x = (*a >> b) & 1; - // *a = *a ^ (1 << b); - // return x; - - // We only need `*a` - return (v ^ (1 << i)); -#else - int r = 0; asm ("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags" ); return r; -#endif - } - - __forceinline int bts(int v, int i) { -#if defined(__aarch64__) - // _bittestandset(long *a, long b) { - // unsigned char x = (*a >> b) & 1; - // *a = *a | (1 << b); - // return x; - return (v | (v << i)); -#else - int r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; -#endif - } - - __forceinline int btr(int v, int i) { -#if defined(__aarch64__) - // _bittestandreset(long *a, long b) { - // unsigned char x = (*a >> b) & 1; - // *a = *a & ~(1 << b); - // return x; - return (v & ~(v << i)); -#else - int r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; -#endif - } - - __forceinline size_t btc(size_t v, size_t i) { -#if defined(__aarch64__) - return (v ^ (1 << i)); -#else - size_t r = 0; asm ("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags" ); return r; -#endif - } - - __forceinline size_t bts(size_t v, size_t i) { -#if defined(__aarch64__) - return (v | (v << i)); -#else - size_t r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; -#endif - } - - __forceinline size_t btr(size_t v, size_t i) { -#if defined(__ARM_NEON) - return (v & ~(v << i)); -#else - size_t r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; -#endif - } - - __forceinline int32_t atomic_cmpxchg(int32_t volatile* value, int32_t comparand, const int32_t input) { - return __sync_val_compare_and_swap(value, comparand, input); - } - -#endif - -//////////////////////////////////////////////////////////////////////////////// -/// All Platforms -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__clang__) || defined(__GNUC__) -#if !defined(_mm_undefined_ps) - __forceinline __m128 _mm_undefined_ps() { return _mm_setzero_ps(); } -#endif -#if !defined(_mm_undefined_si128) - __forceinline __m128i _mm_undefined_si128() { return _mm_setzero_si128(); } -#endif -#if !defined(_mm256_undefined_ps) && defined(__AVX__) - __forceinline __m256 _mm256_undefined_ps() { return _mm256_setzero_ps(); } -#endif -#if !defined(_mm256_undefined_si256) && defined(__AVX__) - __forceinline __m256i _mm256_undefined_si256() { return _mm256_setzero_si256(); } -#endif -#if !defined(_mm512_undefined_ps) && defined(__AVX512F__) - __forceinline __m512 _mm512_undefined_ps() { return _mm512_setzero_ps(); } -#endif -#if !defined(_mm512_undefined_epi32) && defined(__AVX512F__) - __forceinline __m512i _mm512_undefined_epi32() { return _mm512_setzero_si512(); } -#endif -#endif - -#if defined(__SSE4_2__) || defined(__ARM_NEON) - - __forceinline int popcnt(int in) { - return _mm_popcnt_u32(in); - } - - __forceinline unsigned popcnt(unsigned in) { - return _mm_popcnt_u32(in); - } - -#if defined(__X86_64__) || defined(__ARM_NEON) - __forceinline size_t popcnt(size_t in) { - return _mm_popcnt_u64(in); - } -#endif - -#endif - - __forceinline uint64_t rdtsc() - { - int dummy[4]; - __cpuid(dummy,0); - uint64_t clock = read_tsc(); - __cpuid(dummy,0); - return clock; - } - - __forceinline void pause_cpu(const size_t N = 8) - { - for (size_t i=0; i<N; i++) - _mm_pause(); - } - - /* prefetches */ - __forceinline void prefetchL1 (const void* ptr) { _mm_prefetch((const char*)ptr,_MM_HINT_T0); } - __forceinline void prefetchL2 (const void* ptr) { _mm_prefetch((const char*)ptr,_MM_HINT_T1); } - __forceinline void prefetchL3 (const void* ptr) { _mm_prefetch((const char*)ptr,_MM_HINT_T2); } - __forceinline void prefetchNTA(const void* ptr) { _mm_prefetch((const char*)ptr,_MM_HINT_NTA); } - __forceinline void prefetchEX (const void* ptr) { -#if defined(__INTEL_COMPILER) - _mm_prefetch((const char*)ptr,_MM_HINT_ET0); -#else - _mm_prefetch((const char*)ptr,_MM_HINT_T0); -#endif - } - - __forceinline void prefetchL1EX(const void* ptr) { - prefetchEX(ptr); - } - - __forceinline void prefetchL2EX(const void* ptr) { - prefetchEX(ptr); - } -#if defined(__AVX2__) && !defined(__aarch64__) - __forceinline unsigned int pext(unsigned int a, unsigned int b) { return _pext_u32(a, b); } - __forceinline unsigned int pdep(unsigned int a, unsigned int b) { return _pdep_u32(a, b); } -#if defined(__X86_64__) - __forceinline size_t pext(size_t a, size_t b) { return _pext_u64(a, b); } - __forceinline size_t pdep(size_t a, size_t b) { return _pdep_u64(a, b); } -#endif -#endif - -#if defined(__AVX512F__) -#if defined(__INTEL_COMPILER) - __forceinline float mm512_cvtss_f32(__m512 v) { - return _mm512_cvtss_f32(v); - } - __forceinline int mm512_mask2int(__mmask16 k1) { - return _mm512_mask2int(k1); - } - __forceinline __mmask16 mm512_int2mask(int mask) { - return _mm512_int2mask(mask); - } -#else - __forceinline float mm512_cvtss_f32(__m512 v) { // FIXME: _mm512_cvtss_f32 neither supported by clang v4.0.0 nor GCC 6.3 - return _mm_cvtss_f32(_mm512_castps512_ps128(v)); - } - __forceinline int mm512_mask2int(__mmask16 k1) { // FIXME: _mm512_mask2int not yet supported by GCC 6.3 - return (int)k1; - } - __forceinline __mmask16 mm512_int2mask(int mask) { // FIXME: _mm512_int2mask not yet supported by GCC 6.3 - return (__mmask16)mask; - } -#endif -#endif -} diff --git a/thirdparty/embree-aarch64/common/sys/library.cpp b/thirdparty/embree-aarch64/common/sys/library.cpp deleted file mode 100644 index 899267a1e4..0000000000 --- a/thirdparty/embree-aarch64/common/sys/library.cpp +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "library.h" -#include "sysinfo.h" -#include "filename.h" - -//////////////////////////////////////////////////////////////////////////////// -/// Windows Platform -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__WIN32__) - -#define WIN32_LEAN_AND_MEAN -#include <windows.h> - -namespace embree -{ - /* opens a shared library */ - lib_t openLibrary(const std::string& file) - { - std::string fullName = file+".dll"; - FileName executable = getExecutableFileName(); - HANDLE handle = LoadLibrary((executable.path() + fullName).c_str()); - return lib_t(handle); - } - - /* returns address of a symbol from the library */ - void* getSymbol(lib_t lib, const std::string& sym) { - return reinterpret_cast<void *>(GetProcAddress(HMODULE(lib),sym.c_str())); - } - - /* closes the shared library */ - void closeLibrary(lib_t lib) { - FreeLibrary(HMODULE(lib)); - } -} -#endif - -//////////////////////////////////////////////////////////////////////////////// -/// Unix Platform -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__UNIX__) - -#include <dlfcn.h> - -namespace embree -{ - /* opens a shared library */ - lib_t openLibrary(const std::string& file) - { -#if defined(__MACOSX__) - std::string fullName = "lib"+file+".dylib"; -#else - std::string fullName = "lib"+file+".so"; -#endif - void* lib = dlopen(fullName.c_str(), RTLD_NOW); - if (lib) return lib_t(lib); - FileName executable = getExecutableFileName(); - lib = dlopen((executable.path() + fullName).c_str(),RTLD_NOW); - if (lib == nullptr) { - const char* error = dlerror(); - if (error) { - THROW_RUNTIME_ERROR(error); - } else { - THROW_RUNTIME_ERROR("could not load library "+executable.str()); - } - } - return lib_t(lib); - } - - /* returns address of a symbol from the library */ - void* getSymbol(lib_t lib, const std::string& sym) { - return dlsym(lib,sym.c_str()); - } - - /* closes the shared library */ - void closeLibrary(lib_t lib) { - dlclose(lib); - } -} -#endif diff --git a/thirdparty/embree-aarch64/common/sys/library.h b/thirdparty/embree-aarch64/common/sys/library.h deleted file mode 100644 index c2164e9fbe..0000000000 --- a/thirdparty/embree-aarch64/common/sys/library.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "platform.h" - -namespace embree -{ - /*! type for shared library */ - typedef struct opaque_lib_t* lib_t; - - /*! loads a shared library */ - lib_t openLibrary(const std::string& file); - - /*! returns address of a symbol from the library */ - void* getSymbol(lib_t lib, const std::string& sym); - - /*! unloads a shared library */ - void closeLibrary(lib_t lib); -} diff --git a/thirdparty/embree-aarch64/common/sys/mutex.cpp b/thirdparty/embree-aarch64/common/sys/mutex.cpp deleted file mode 100644 index 11779bc9b9..0000000000 --- a/thirdparty/embree-aarch64/common/sys/mutex.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "mutex.h" -#include "regression.h" - -#if defined(__WIN32__) && !defined(PTHREADS_WIN32) - -#define WIN32_LEAN_AND_MEAN -#include <windows.h> - -namespace embree -{ - MutexSys::MutexSys() { mutex = new CRITICAL_SECTION; InitializeCriticalSection((CRITICAL_SECTION*)mutex); } - MutexSys::~MutexSys() { DeleteCriticalSection((CRITICAL_SECTION*)mutex); delete (CRITICAL_SECTION*)mutex; } - void MutexSys::lock() { EnterCriticalSection((CRITICAL_SECTION*)mutex); } - bool MutexSys::try_lock() { return TryEnterCriticalSection((CRITICAL_SECTION*)mutex) != 0; } - void MutexSys::unlock() { LeaveCriticalSection((CRITICAL_SECTION*)mutex); } -} -#endif - -#if defined(__UNIX__) || defined(PTHREADS_WIN32) -#include <pthread.h> -namespace embree -{ - /*! system mutex using pthreads */ - MutexSys::MutexSys() - { - mutex = new pthread_mutex_t; - if (pthread_mutex_init((pthread_mutex_t*)mutex, nullptr) != 0) - THROW_RUNTIME_ERROR("pthread_mutex_init failed"); - } - - MutexSys::~MutexSys() - { - MAYBE_UNUSED bool ok = pthread_mutex_destroy((pthread_mutex_t*)mutex) == 0; - assert(ok); - delete (pthread_mutex_t*)mutex; - mutex = nullptr; - } - - void MutexSys::lock() - { - if (pthread_mutex_lock((pthread_mutex_t*)mutex) != 0) - THROW_RUNTIME_ERROR("pthread_mutex_lock failed"); - } - - bool MutexSys::try_lock() { - return pthread_mutex_trylock((pthread_mutex_t*)mutex) == 0; - } - - void MutexSys::unlock() - { - if (pthread_mutex_unlock((pthread_mutex_t*)mutex) != 0) - THROW_RUNTIME_ERROR("pthread_mutex_unlock failed"); - } -}; -#endif diff --git a/thirdparty/embree-aarch64/common/sys/mutex.h b/thirdparty/embree-aarch64/common/sys/mutex.h deleted file mode 100644 index 1164210f23..0000000000 --- a/thirdparty/embree-aarch64/common/sys/mutex.h +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "platform.h" -#include "intrinsics.h" -#include "atomic.h" - -namespace embree -{ - /*! system mutex */ - class MutexSys { - friend struct ConditionImplementation; - public: - MutexSys(); - ~MutexSys(); - - private: - MutexSys (const MutexSys& other) DELETED; // do not implement - MutexSys& operator= (const MutexSys& other) DELETED; // do not implement - - public: - void lock(); - bool try_lock(); - void unlock(); - - protected: - void* mutex; - }; - - /*! spinning mutex */ - class SpinLock - { - public: - - SpinLock () - : flag(false) {} - - __forceinline bool isLocked() { - return flag.load(); - } - - __forceinline void lock() - { - while (true) - { - while (flag.load()) - { - _mm_pause(); - _mm_pause(); - } - - bool expected = false; - if (flag.compare_exchange_strong(expected,true,std::memory_order_acquire)) - break; - } - } - - __forceinline bool try_lock() - { - bool expected = false; - if (flag.load() != expected) { - return false; - } - return flag.compare_exchange_strong(expected,true,std::memory_order_acquire); - } - - __forceinline void unlock() { - flag.store(false,std::memory_order_release); - } - - __forceinline void wait_until_unlocked() - { - while(flag.load()) - { - _mm_pause(); - _mm_pause(); - } - } - - public: - atomic<bool> flag; - }; - - /*! safe mutex lock and unlock helper */ - template<typename Mutex> class Lock { - public: - Lock (Mutex& mutex) : mutex(mutex), locked(true) { mutex.lock(); } - Lock (Mutex& mutex, bool locked) : mutex(mutex), locked(locked) {} - ~Lock() { if (locked) mutex.unlock(); } - __forceinline void lock() { assert(!locked); locked = true; mutex.lock(); } - __forceinline bool isLocked() const { return locked; } - protected: - Mutex& mutex; - bool locked; - }; -} diff --git a/thirdparty/embree-aarch64/common/sys/platform.h b/thirdparty/embree-aarch64/common/sys/platform.h deleted file mode 100644 index 737f14aa6e..0000000000 --- a/thirdparty/embree-aarch64/common/sys/platform.h +++ /dev/null @@ -1,387 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#define _CRT_SECURE_NO_WARNINGS - -#include <cstddef> -#include <cassert> -#include <cstdlib> -#include <cstdio> -#include <memory> -#include <stdexcept> -#include <iostream> -#include <iomanip> -#include <fstream> -#include <string> -#include <cstring> -#include <stdint.h> -#include <functional> - -//////////////////////////////////////////////////////////////////////////////// -/// detect platform -//////////////////////////////////////////////////////////////////////////////// - -/* detect 32 or 64 platform */ -#if defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) -#define __X86_64__ -#endif - -/* detect Linux platform */ -#if defined(linux) || defined(__linux__) || defined(__LINUX__) -# if !defined(__LINUX__) -# define __LINUX__ -# endif -# if !defined(__UNIX__) -# define __UNIX__ -# endif -#endif - -/* detect FreeBSD platform */ -#if defined(__FreeBSD__) || defined(__FREEBSD__) -# if !defined(__FREEBSD__) -# define __FREEBSD__ -# endif -# if !defined(__UNIX__) -# define __UNIX__ -# endif -#endif - -/* detect Windows 95/98/NT/2000/XP/Vista/7/8/10 platform */ -#if (defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__NT__)) && !defined(__CYGWIN__) -# if !defined(__WIN32__) -# define __WIN32__ -# endif -#endif - -/* detect Cygwin platform */ -#if defined(__CYGWIN__) -# if !defined(__UNIX__) -# define __UNIX__ -# endif -#endif - -/* detect MAC OS X platform */ -#if defined(__APPLE__) || defined(MACOSX) || defined(__MACOSX__) -# if !defined(__MACOSX__) -# define __MACOSX__ -# endif -# if !defined(__UNIX__) -# define __UNIX__ -# endif -#endif - -/* try to detect other Unix systems */ -#if defined(__unix__) || defined (unix) || defined(__unix) || defined(_unix) -# if !defined(__UNIX__) -# define __UNIX__ -# endif -#endif - -//////////////////////////////////////////////////////////////////////////////// -/// Macros -//////////////////////////////////////////////////////////////////////////////// - -#ifdef __WIN32__ -#define dll_export __declspec(dllexport) -#define dll_import __declspec(dllimport) -#else -#define dll_export __attribute__ ((visibility ("default"))) -#define dll_import -#endif - -#ifdef __WIN32__ -#if !defined(__noinline) -#define __noinline __declspec(noinline) -#endif -//#define __forceinline __forceinline -//#define __restrict __restrict -#if defined(__INTEL_COMPILER) -#define __restrict__ __restrict -#else -#define __restrict__ //__restrict // causes issues with MSVC -#endif -#if !defined(__thread) -// NOTE: Require `-fms-extensions` for clang -#define __thread __declspec(thread) -#endif -#if !defined(__aligned) -#if defined(__MINGW32__) -#define __aligned(...) __attribute__((aligned(__VA_ARGS__))) -#else -#define __aligned(...) __declspec(align(__VA_ARGS__)) -#endif -#endif -//#define __FUNCTION__ __FUNCTION__ -#define debugbreak() __debugbreak() - -#else -#if !defined(__noinline) -#define __noinline __attribute__((noinline)) -#endif -#if !defined(__forceinline) -#define __forceinline inline __attribute__((always_inline)) -#endif -//#define __restrict __restrict -//#define __thread __thread -#if !defined(__aligned) -#define __aligned(...) __attribute__((aligned(__VA_ARGS__))) -#endif -#if !defined(__FUNCTION__) -#define __FUNCTION__ __PRETTY_FUNCTION__ -#endif -#define debugbreak() asm ("int $3") -#endif - -#if defined(__clang__) || defined(__GNUC__) - #define MAYBE_UNUSED __attribute__((unused)) -#else - #define MAYBE_UNUSED -#endif - -#if defined(_MSC_VER) && (_MSC_VER < 1900) // before VS2015 deleted functions are not supported properly - #define DELETED -#else - #define DELETED = delete -#endif - -// -- GODOT start -- -#ifndef likely -// -- GODOT end -- -#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) -#define likely(expr) (expr) -#define unlikely(expr) (expr) -#else -#define likely(expr) __builtin_expect((bool)(expr),true ) -#define unlikely(expr) __builtin_expect((bool)(expr),false) -#endif -// -- GODOT start -- -#endif -// -- GODOT end -- - -//////////////////////////////////////////////////////////////////////////////// -/// Error handling and debugging -//////////////////////////////////////////////////////////////////////////////// - -/* debug printing macros */ -#define STRING(x) #x -#define TOSTRING(x) STRING(x) -#define PING embree_cout << __FILE__ << " (" << __LINE__ << "): " << __FUNCTION__ << embree_endl -#define PRINT(x) embree_cout << STRING(x) << " = " << (x) << embree_endl -#define PRINT2(x,y) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << embree_endl -#define PRINT3(x,y,z) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << embree_endl -#define PRINT4(x,y,z,w) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl - -#if defined(DEBUG) // only report file and line in debug mode - // -- GODOT start -- - // #define THROW_RUNTIME_ERROR(str) - // throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)); - #define THROW_RUNTIME_ERROR(str) \ - printf(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)), abort(); - // -- GODOT end -- -#else - // -- GODOT start -- - // #define THROW_RUNTIME_ERROR(str) - // throw std::runtime_error(str); - #define THROW_RUNTIME_ERROR(str) \ - abort(); - // -- GODOT end -- -#endif - -#define FATAL(x) THROW_RUNTIME_ERROR(x) -#define WARNING(x) { std::cerr << "Warning: " << x << embree_endl << std::flush; } - -#define NOT_IMPLEMENTED FATAL(std::string(__FUNCTION__) + " not implemented") - -//////////////////////////////////////////////////////////////////////////////// -/// Basic types -//////////////////////////////////////////////////////////////////////////////// - -/* default floating-point type */ -namespace embree { - typedef float real; -} - -/* windows does not have ssize_t */ -#if defined(__WIN32__) -#if defined(__X86_64__) || defined(__aarch64__) -typedef int64_t ssize_t; -#else -typedef int32_t ssize_t; -#endif -#endif - -//////////////////////////////////////////////////////////////////////////////// -/// Basic utility functions -//////////////////////////////////////////////////////////////////////////////// - -__forceinline std::string toString(long long value) { - return std::to_string(value); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Disable some compiler warnings -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__INTEL_COMPILER) -//#pragma warning(disable:265 ) // floating-point operation result is out of range -//#pragma warning(disable:383 ) // value copied to temporary, reference to temporary used -//#pragma warning(disable:869 ) // parameter was never referenced -//#pragma warning(disable:981 ) // operands are evaluated in unspecified order -//#pragma warning(disable:1418) // external function definition with no prior declaration -//#pragma warning(disable:1419) // external declaration in primary source file -//#pragma warning(disable:1572) // floating-point equality and inequality comparisons are unreliable -//#pragma warning(disable:94 ) // the size of an array must be greater than zero -//#pragma warning(disable:1599) // declaration hides parameter -//#pragma warning(disable:424 ) // extra ";" ignored -#pragma warning(disable:2196) // routine is both "inline" and "noinline" -//#pragma warning(disable:177 ) // label was declared but never referenced -//#pragma warning(disable:114 ) // function was referenced but not defined -//#pragma warning(disable:819 ) // template nesting depth does not match the previous declaration of function -#pragma warning(disable:15335) // was not vectorized: vectorization possible but seems inefficient -#endif - -#if defined(_MSC_VER) -//#pragma warning(disable:4200) // nonstandard extension used : zero-sized array in struct/union -#pragma warning(disable:4800) // forcing value to bool 'true' or 'false' (performance warning) -//#pragma warning(disable:4267) // '=' : conversion from 'size_t' to 'unsigned long', possible loss of data -#pragma warning(disable:4244) // 'argument' : conversion from 'ssize_t' to 'unsigned int', possible loss of data -//#pragma warning(disable:4355) // 'this' : used in base member initializer list -//#pragma warning(disable:391 ) // '<=' : signed / unsigned mismatch -//#pragma warning(disable:4018) // '<' : signed / unsigned mismatch -//#pragma warning(disable:4305) // 'initializing' : truncation from 'double' to 'float' -//#pragma warning(disable:4068) // unknown pragma -//#pragma warning(disable:4146) // unary minus operator applied to unsigned type, result still unsigned -//#pragma warning(disable:4838) // conversion from 'unsigned int' to 'const int' requires a narrowing conversion) -//#pragma warning(disable:4227) // anachronism used : qualifiers on reference are ignored -#pragma warning(disable:4503) // decorated name length exceeded, name was truncated -#pragma warning(disable:4180) // qualifier applied to function type has no meaning; ignored -#pragma warning(disable:4258) // definition from the for loop is ignored; the definition from the enclosing scope is used - -# if _MSC_VER < 1910 // prior to Visual studio 2017 (V141) -# pragma warning(disable:4101) // warning C4101: 'x': unreferenced local variable // a compiler bug issues wrong warnings -# pragma warning(disable:4789) // buffer '' of size 8 bytes will be overrun; 32 bytes will be written starting at offset 0 -# endif - -#endif - -#if defined(__clang__) && !defined(__INTEL_COMPILER) -//#pragma clang diagnostic ignored "-Wunknown-pragmas" -//#pragma clang diagnostic ignored "-Wunused-variable" -//#pragma clang diagnostic ignored "-Wreorder" -//#pragma clang diagnostic ignored "-Wmicrosoft" -//#pragma clang diagnostic ignored "-Wunused-private-field" -//#pragma clang diagnostic ignored "-Wunused-local-typedef" -//#pragma clang diagnostic ignored "-Wunused-function" -//#pragma clang diagnostic ignored "-Wnarrowing" -//#pragma clang diagnostic ignored "-Wc++11-narrowing" -//#pragma clang diagnostic ignored "-Wdeprecated-register" -//#pragma clang diagnostic ignored "-Wdeprecated-declarations" -#endif - -#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__) -#pragma GCC diagnostic ignored "-Wpragmas" -//#pragma GCC diagnostic ignored "-Wnarrowing" -#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" -//#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -//#pragma GCC diagnostic ignored "-Warray-bounds" -#pragma GCC diagnostic ignored "-Wattributes" -#pragma GCC diagnostic ignored "-Wmisleading-indentation" -#pragma GCC diagnostic ignored "-Wsign-compare" -#pragma GCC diagnostic ignored "-Wparentheses" -#endif - -#if defined(__clang__) && defined(__WIN32__) -#pragma clang diagnostic ignored "-Wunused-parameter" -#pragma clang diagnostic ignored "-Wmicrosoft-cast" -#pragma clang diagnostic ignored "-Wmicrosoft-enum-value" -#pragma clang diagnostic ignored "-Wmicrosoft-include" -#pragma clang diagnostic ignored "-Wunused-function" -#pragma clang diagnostic ignored "-Wunknown-pragmas" -#endif - -/* disabling deprecated warning, please use only where use of deprecated Embree API functions is desired */ -#if defined(__WIN32__) && defined(__INTEL_COMPILER) -#define DISABLE_DEPRECATED_WARNING __pragma(warning (disable: 1478)) // warning: function was declared deprecated -#define ENABLE_DEPRECATED_WARNING __pragma(warning (enable: 1478)) // warning: function was declared deprecated -#elif defined(__INTEL_COMPILER) -#define DISABLE_DEPRECATED_WARNING _Pragma("warning (disable: 1478)") // warning: function was declared deprecated -#define ENABLE_DEPRECATED_WARNING _Pragma("warning (enable : 1478)") // warning: function was declared deprecated -#elif defined(__clang__) -#define DISABLE_DEPRECATED_WARNING _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") // warning: xxx is deprecated -#define ENABLE_DEPRECATED_WARNING _Pragma("clang diagnostic warning \"-Wdeprecated-declarations\"") // warning: xxx is deprecated -#elif defined(__GNUC__) -#define DISABLE_DEPRECATED_WARNING _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") // warning: xxx is deprecated -#define ENABLE_DEPRECATED_WARNING _Pragma("GCC diagnostic warning \"-Wdeprecated-declarations\"") // warning: xxx is deprecated -#elif defined(_MSC_VER) -#define DISABLE_DEPRECATED_WARNING __pragma(warning (disable: 4996)) // warning: function was declared deprecated -#define ENABLE_DEPRECATED_WARNING __pragma(warning (enable : 4996)) // warning: function was declared deprecated -#endif - -/* embree output stream */ -#define embree_ostream std::ostream& -#define embree_cout std::cout -#define embree_cout_uniform std::cout -#define embree_endl std::endl - -//////////////////////////////////////////////////////////////////////////////// -/// Some macros for static profiling -//////////////////////////////////////////////////////////////////////////////// - -#if defined (__GNUC__) -#define IACA_SSC_MARK( MARK_ID ) \ -__asm__ __volatile__ ( \ - "\n\t movl $"#MARK_ID", %%ebx" \ - "\n\t .byte 0x64, 0x67, 0x90" \ - : : : "memory" ); - -#define IACA_UD_BYTES __asm__ __volatile__ ("\n\t .byte 0x0F, 0x0B"); - -#else -#define IACA_UD_BYTES {__asm _emit 0x0F \ - __asm _emit 0x0B} - -#define IACA_SSC_MARK(x) {__asm mov ebx, x\ - __asm _emit 0x64 \ - __asm _emit 0x67 \ - __asm _emit 0x90 } - -#define IACA_VC64_START __writegsbyte(111, 111); -#define IACA_VC64_END __writegsbyte(222, 222); - -#endif - -#define IACA_START {IACA_UD_BYTES \ - IACA_SSC_MARK(111)} -#define IACA_END {IACA_SSC_MARK(222) \ - IACA_UD_BYTES} - -namespace embree -{ - template<typename Closure> - struct OnScopeExitHelper - { - OnScopeExitHelper (const Closure f) : active(true), f(f) {} - ~OnScopeExitHelper() { if (active) f(); } - void deactivate() { active = false; } - bool active; - const Closure f; - }; - - template <typename Closure> - OnScopeExitHelper<Closure> OnScopeExit(const Closure f) { - return OnScopeExitHelper<Closure>(f); - } - -#define STRING_JOIN2(arg1, arg2) DO_STRING_JOIN2(arg1, arg2) -#define DO_STRING_JOIN2(arg1, arg2) arg1 ## arg2 -#define ON_SCOPE_EXIT(code) \ - auto STRING_JOIN2(on_scope_exit_, __LINE__) = OnScopeExit([&](){code;}) - - template<typename Ty> - std::unique_ptr<Ty> make_unique(Ty* ptr) { - return std::unique_ptr<Ty>(ptr); - } - -} diff --git a/thirdparty/embree-aarch64/common/sys/ref.h b/thirdparty/embree-aarch64/common/sys/ref.h deleted file mode 100644 index 24648e6234..0000000000 --- a/thirdparty/embree-aarch64/common/sys/ref.h +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "atomic.h" - -namespace embree -{ - struct NullTy { - }; - - extern MAYBE_UNUSED NullTy null; - - class RefCount - { - public: - RefCount(int val = 0) : refCounter(val) {} - virtual ~RefCount() {}; - - virtual RefCount* refInc() { refCounter.fetch_add(1); return this; } - virtual void refDec() { if (refCounter.fetch_add(-1) == 1) delete this; } - private: - std::atomic<size_t> refCounter; - }; - - //////////////////////////////////////////////////////////////////////////////// - /// Reference to single object - //////////////////////////////////////////////////////////////////////////////// - - template<typename Type> - class Ref - { - public: - Type* ptr; - - //////////////////////////////////////////////////////////////////////////////// - /// Constructors, Assignment & Cast Operators - //////////////////////////////////////////////////////////////////////////////// - - __forceinline Ref() : ptr(nullptr) {} - __forceinline Ref(NullTy) : ptr(nullptr) {} - __forceinline Ref(const Ref& input) : ptr(input.ptr) { if (ptr) ptr->refInc(); } - __forceinline Ref(Ref&& input) : ptr(input.ptr) { input.ptr = nullptr; } - - __forceinline Ref(Type* const input) : ptr(input) - { - if (ptr) - ptr->refInc(); - } - - __forceinline ~Ref() - { - if (ptr) - ptr->refDec(); - } - - __forceinline Ref& operator =(const Ref& input) - { - if (input.ptr) - input.ptr->refInc(); - if (ptr) - ptr->refDec(); - ptr = input.ptr; - return *this; - } - - __forceinline Ref& operator =(Ref&& input) - { - if (ptr) - ptr->refDec(); - ptr = input.ptr; - input.ptr = nullptr; - return *this; - } - - __forceinline Ref& operator =(Type* const input) - { - if (input) - input->refInc(); - if (ptr) - ptr->refDec(); - ptr = input; - return *this; - } - - __forceinline Ref& operator =(NullTy) - { - if (ptr) - ptr->refDec(); - ptr = nullptr; - return *this; - } - - __forceinline operator bool() const { return ptr != nullptr; } - - __forceinline const Type& operator *() const { return *ptr; } - __forceinline Type& operator *() { return *ptr; } - __forceinline const Type* operator ->() const { return ptr; } - __forceinline Type* operator ->() { return ptr; } - - template<typename TypeOut> - __forceinline Ref<TypeOut> cast() { return Ref<TypeOut>(static_cast<TypeOut*>(ptr)); } - template<typename TypeOut> - __forceinline const Ref<TypeOut> cast() const { return Ref<TypeOut>(static_cast<TypeOut*>(ptr)); } - - template<typename TypeOut> - __forceinline Ref<TypeOut> dynamicCast() { return Ref<TypeOut>(dynamic_cast<TypeOut*>(ptr)); } - template<typename TypeOut> - __forceinline const Ref<TypeOut> dynamicCast() const { return Ref<TypeOut>(dynamic_cast<TypeOut*>(ptr)); } - }; - - template<typename Type> __forceinline bool operator < (const Ref<Type>& a, const Ref<Type>& b) { return a.ptr < b.ptr; } - - template<typename Type> __forceinline bool operator ==(const Ref<Type>& a, NullTy ) { return a.ptr == nullptr; } - template<typename Type> __forceinline bool operator ==(NullTy , const Ref<Type>& b) { return nullptr == b.ptr; } - template<typename Type> __forceinline bool operator ==(const Ref<Type>& a, const Ref<Type>& b) { return a.ptr == b.ptr; } - - template<typename Type> __forceinline bool operator !=(const Ref<Type>& a, NullTy ) { return a.ptr != nullptr; } - template<typename Type> __forceinline bool operator !=(NullTy , const Ref<Type>& b) { return nullptr != b.ptr; } - template<typename Type> __forceinline bool operator !=(const Ref<Type>& a, const Ref<Type>& b) { return a.ptr != b.ptr; } -} diff --git a/thirdparty/embree-aarch64/common/sys/regression.cpp b/thirdparty/embree-aarch64/common/sys/regression.cpp deleted file mode 100644 index d95ff8dfe0..0000000000 --- a/thirdparty/embree-aarch64/common/sys/regression.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "regression.h" - -namespace embree -{ - /* registerRegressionTest is invoked from static initializers, thus - * we cannot have the regression_tests variable as global static - * variable due to issues with static variable initialization - * order. */ - std::vector<RegressionTest*>& get_regression_tests() - { - static std::vector<RegressionTest*> regression_tests; - return regression_tests; - } - - void registerRegressionTest(RegressionTest* test) - { - get_regression_tests().push_back(test); - } - - RegressionTest* getRegressionTest(size_t index) - { - if (index >= get_regression_tests().size()) - return nullptr; - - return get_regression_tests()[index]; - } -} diff --git a/thirdparty/embree-aarch64/common/sys/regression.h b/thirdparty/embree-aarch64/common/sys/regression.h deleted file mode 100644 index 632f8d92cf..0000000000 --- a/thirdparty/embree-aarch64/common/sys/regression.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "platform.h" - -#include <vector> - -namespace embree -{ - /*! virtual interface for all regression tests */ - struct RegressionTest - { - RegressionTest (std::string name) : name(name) {} - virtual bool run() = 0; - std::string name; - }; - - /*! registers a regression test */ - void registerRegressionTest(RegressionTest* test); - - /*! run all regression tests */ - RegressionTest* getRegressionTest(size_t index); -} diff --git a/thirdparty/embree-aarch64/common/sys/string.cpp b/thirdparty/embree-aarch64/common/sys/string.cpp deleted file mode 100644 index 931244383e..0000000000 --- a/thirdparty/embree-aarch64/common/sys/string.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "string.h" - -#include <algorithm> -#include <ctype.h> - -namespace embree -{ - char to_lower(char c) { return char(tolower(int(c))); } - char to_upper(char c) { return char(toupper(int(c))); } - std::string toLowerCase(const std::string& s) { std::string dst(s); std::transform(dst.begin(), dst.end(), dst.begin(), to_lower); return dst; } - std::string toUpperCase(const std::string& s) { std::string dst(s); std::transform(dst.begin(), dst.end(), dst.begin(), to_upper); return dst; } - - Vec2f string_to_Vec2f ( std::string str ) - { - size_t next = 0; - const float x = std::stof(str,&next); str = str.substr(next+1); - const float y = std::stof(str,&next); - return Vec2f(x,y); - } - - Vec3f string_to_Vec3f ( std::string str ) - { - size_t next = 0; - const float x = std::stof(str,&next); str = str.substr(next+1); - const float y = std::stof(str,&next); str = str.substr(next+1); - const float z = std::stof(str,&next); - return Vec3f(x,y,z); - } - - Vec4f string_to_Vec4f ( std::string str ) - { - size_t next = 0; - const float x = std::stof(str,&next); str = str.substr(next+1); - const float y = std::stof(str,&next); str = str.substr(next+1); - const float z = std::stof(str,&next); str = str.substr(next+1); - const float w = std::stof(str,&next); - return Vec4f(x,y,z,w); - } -} diff --git a/thirdparty/embree-aarch64/common/sys/string.h b/thirdparty/embree-aarch64/common/sys/string.h deleted file mode 100644 index 2e9b0f88c3..0000000000 --- a/thirdparty/embree-aarch64/common/sys/string.h +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "platform.h" -#include "../math/vec2.h" -#include "../math/vec3.h" -#include "../math/vec4.h" - -namespace embree -{ - class IOStreamStateRestorer - { - public: - IOStreamStateRestorer(std::ostream& iostream) - : iostream(iostream), flags(iostream.flags()), precision(iostream.precision()) { - } - - ~IOStreamStateRestorer() { - iostream.flags(flags); - iostream.precision(precision); - } - - private: - std::ostream& iostream; - std::ios::fmtflags flags; - std::streamsize precision; - }; - - std::string toLowerCase(const std::string& s); - std::string toUpperCase(const std::string& s); - - Vec2f string_to_Vec2f ( std::string str ); - Vec3f string_to_Vec3f ( std::string str ); - Vec4f string_to_Vec4f ( std::string str ); -} diff --git a/thirdparty/embree-aarch64/common/sys/sysinfo.cpp b/thirdparty/embree-aarch64/common/sys/sysinfo.cpp deleted file mode 100644 index 1d11436770..0000000000 --- a/thirdparty/embree-aarch64/common/sys/sysinfo.cpp +++ /dev/null @@ -1,676 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "sysinfo.h" -#include "intrinsics.h" -#include "string.h" -#include "ref.h" -#if defined(__FREEBSD__) -#include <sys/cpuset.h> -#include <pthread_np.h> -typedef cpuset_t cpu_set_t; -#endif - -//////////////////////////////////////////////////////////////////////////////// -/// All Platforms -//////////////////////////////////////////////////////////////////////////////// - -namespace embree -{ - NullTy null; - - std::string getPlatformName() - { -#if defined(__LINUX__) && defined(__ANDROID__) && defined(__aarch64__) && defined(__ARM_NEON) - return "Android Linux (aarch64 / arm64)"; -#elif defined(__LINUX__) && defined(__ANDROID__) && defined(__X86_64__) - return "Android Linux (x64)"; -#elif defined(__LINUX__) && defined(__ANDROID__) && (defined(_X86_) || defined(__X86__) || defined(_M_IX86)) - return "Android Linux (x86)"; -#elif defined(__LINUX__) && !defined(__X86_64__) - return "Linux (32bit)"; -#elif defined(__LINUX__) && defined(__X86_64__) - return "Linux (64bit)"; -#elif defined(__FREEBSD__) && !defined(__X86_64__) - return "FreeBSD (32bit)"; -#elif defined(__FREEBSD__) && defined(__X86_64__) - return "FreeBSD (64bit)"; -#elif defined(__CYGWIN__) && !defined(__X86_64__) - return "Cygwin (32bit)"; -#elif defined(__CYGWIN__) && defined(__X86_64__) - return "Cygwin (64bit)"; -#elif defined(__WIN32__) && !defined(__X86_64__) - return "Windows (32bit)"; -#elif defined(__WIN32__) && defined(__X86_64__) - return "Windows (64bit)"; -#elif defined(TARGET_IPHONE_SIMULATOR) && defined(__X86_64__) - return "iOS Simulator (x64)"; -#elif defined(TARGET_OS_IPHONE) && defined(__aarch64__) && defined(__ARM_NEON) - return "iOS (aarch64 / arm64)"; -#elif defined(__MACOSX__) && !defined(__X86_64__) - return "Mac OS X (32bit)"; -#elif defined(__MACOSX__) && defined(__X86_64__) - return "Mac OS X (64bit)"; -#elif defined(__UNIX__) && defined(__aarch64__) - return "Unix (aarch64)"; -#elif defined(__UNIX__) && !defined(__X86_64__) - return "Unix (32bit)"; -#elif defined(__UNIX__) && defined(__X86_64__) - return "Unix (64bit)"; -#else - return "Unknown"; -#endif - } - - std::string getCompilerName() - { -#if defined(__INTEL_COMPILER) - int icc_mayor = __INTEL_COMPILER / 100 % 100; - int icc_minor = __INTEL_COMPILER % 100; - std::string version = "Intel Compiler "; - version += toString(icc_mayor); - version += "." + toString(icc_minor); -#if defined(__INTEL_COMPILER_UPDATE) - version += "." + toString(__INTEL_COMPILER_UPDATE); -#endif - return version; -#elif defined(__clang__) - return "CLANG " __clang_version__; -#elif defined (__GNUC__) - return "GCC " __VERSION__; -#elif defined(_MSC_VER) - std::string version = toString(_MSC_FULL_VER); - version.insert(4,"."); - version.insert(9,"."); - version.insert(2,"."); - return "Visual C++ Compiler " + version; -#else - return "Unknown Compiler"; -#endif - } - - std::string getCPUVendor() - { - int cpuinfo[4]; - __cpuid (cpuinfo, 0); - int name[4]; - name[0] = cpuinfo[1]; - name[1] = cpuinfo[3]; - name[2] = cpuinfo[2]; - name[3] = 0; - return (char*)name; - } - - CPU getCPUModel() - { - if (getCPUVendor() != "GenuineIntel") - return CPU::UNKNOWN; - - int out[4]; - __cpuid(out, 0); - if (out[0] < 1) return CPU::UNKNOWN; - __cpuid(out, 1); - - /* please see CPUID documentation for these formulas */ - uint32_t family_ID = (out[0] >> 8) & 0x0F; - uint32_t extended_family_ID = (out[0] >> 20) & 0xFF; - - uint32_t model_ID = (out[0] >> 4) & 0x0F; - uint32_t extended_model_ID = (out[0] >> 16) & 0x0F; - - uint32_t DisplayFamily = family_ID; - if (family_ID == 0x0F) - DisplayFamily += extended_family_ID; - - uint32_t DisplayModel = model_ID; - if (family_ID == 0x06 || family_ID == 0x0F) - DisplayModel += extended_model_ID << 4; - - uint32_t DisplayFamily_DisplayModel = (DisplayFamily << 8) + (DisplayModel << 0); - - // Data from Intel® 64 and IA-32 Architectures, Volume 4, Chapter 2, Table 2-1 (CPUID Signature Values of DisplayFamily_DisplayModel) - if (DisplayFamily_DisplayModel == 0x067D) return CPU::CORE_ICE_LAKE; - if (DisplayFamily_DisplayModel == 0x067E) return CPU::CORE_ICE_LAKE; - if (DisplayFamily_DisplayModel == 0x068C) return CPU::CORE_TIGER_LAKE; - if (DisplayFamily_DisplayModel == 0x06A5) return CPU::CORE_COMET_LAKE; - if (DisplayFamily_DisplayModel == 0x06A6) return CPU::CORE_COMET_LAKE; - if (DisplayFamily_DisplayModel == 0x0666) return CPU::CORE_CANNON_LAKE; - if (DisplayFamily_DisplayModel == 0x068E) return CPU::CORE_KABY_LAKE; - if (DisplayFamily_DisplayModel == 0x069E) return CPU::CORE_KABY_LAKE; - if (DisplayFamily_DisplayModel == 0x066A) return CPU::XEON_ICE_LAKE; - if (DisplayFamily_DisplayModel == 0x066C) return CPU::XEON_ICE_LAKE; - if (DisplayFamily_DisplayModel == 0x0655) return CPU::XEON_SKY_LAKE; - if (DisplayFamily_DisplayModel == 0x064E) return CPU::CORE_SKY_LAKE; - if (DisplayFamily_DisplayModel == 0x065E) return CPU::CORE_SKY_LAKE; - if (DisplayFamily_DisplayModel == 0x0656) return CPU::XEON_BROADWELL; - if (DisplayFamily_DisplayModel == 0x064F) return CPU::XEON_BROADWELL; - if (DisplayFamily_DisplayModel == 0x0647) return CPU::CORE_BROADWELL; - if (DisplayFamily_DisplayModel == 0x063D) return CPU::CORE_BROADWELL; - if (DisplayFamily_DisplayModel == 0x063F) return CPU::XEON_HASWELL; - if (DisplayFamily_DisplayModel == 0x063C) return CPU::CORE_HASWELL; - if (DisplayFamily_DisplayModel == 0x0645) return CPU::CORE_HASWELL; - if (DisplayFamily_DisplayModel == 0x0646) return CPU::CORE_HASWELL; - if (DisplayFamily_DisplayModel == 0x063E) return CPU::XEON_IVY_BRIDGE; - if (DisplayFamily_DisplayModel == 0x063A) return CPU::CORE_IVY_BRIDGE; - if (DisplayFamily_DisplayModel == 0x062D) return CPU::SANDY_BRIDGE; - if (DisplayFamily_DisplayModel == 0x062F) return CPU::SANDY_BRIDGE; - if (DisplayFamily_DisplayModel == 0x062A) return CPU::SANDY_BRIDGE; - if (DisplayFamily_DisplayModel == 0x062E) return CPU::NEHALEM; - if (DisplayFamily_DisplayModel == 0x0625) return CPU::NEHALEM; - if (DisplayFamily_DisplayModel == 0x062C) return CPU::NEHALEM; - if (DisplayFamily_DisplayModel == 0x061E) return CPU::NEHALEM; - if (DisplayFamily_DisplayModel == 0x061F) return CPU::NEHALEM; - if (DisplayFamily_DisplayModel == 0x061A) return CPU::NEHALEM; - if (DisplayFamily_DisplayModel == 0x061D) return CPU::NEHALEM; - if (DisplayFamily_DisplayModel == 0x0617) return CPU::CORE2; - if (DisplayFamily_DisplayModel == 0x060F) return CPU::CORE2; - if (DisplayFamily_DisplayModel == 0x060E) return CPU::CORE1; - - if (DisplayFamily_DisplayModel == 0x0685) return CPU::XEON_PHI_KNIGHTS_MILL; - if (DisplayFamily_DisplayModel == 0x0657) return CPU::XEON_PHI_KNIGHTS_LANDING; - - return CPU::UNKNOWN; - } - - std::string stringOfCPUModel(CPU model) - { - switch (model) { - case CPU::XEON_ICE_LAKE : return "Xeon Ice Lake"; - case CPU::CORE_ICE_LAKE : return "Core Ice Lake"; - case CPU::CORE_TIGER_LAKE : return "Core Tiger Lake"; - case CPU::CORE_COMET_LAKE : return "Core Comet Lake"; - case CPU::CORE_CANNON_LAKE : return "Core Cannon Lake"; - case CPU::CORE_KABY_LAKE : return "Core Kaby Lake"; - case CPU::XEON_SKY_LAKE : return "Xeon Sky Lake"; - case CPU::CORE_SKY_LAKE : return "Core Sky Lake"; - case CPU::XEON_PHI_KNIGHTS_MILL : return "Xeon Phi Knights Mill"; - case CPU::XEON_PHI_KNIGHTS_LANDING: return "Xeon Phi Knights Landing"; - case CPU::XEON_BROADWELL : return "Xeon Broadwell"; - case CPU::CORE_BROADWELL : return "Core Broadwell"; - case CPU::XEON_HASWELL : return "Xeon Haswell"; - case CPU::CORE_HASWELL : return "Core Haswell"; - case CPU::XEON_IVY_BRIDGE : return "Xeon Ivy Bridge"; - case CPU::CORE_IVY_BRIDGE : return "Core Ivy Bridge"; - case CPU::SANDY_BRIDGE : return "Sandy Bridge"; - case CPU::NEHALEM : return "Nehalem"; - case CPU::CORE2 : return "Core2"; - case CPU::CORE1 : return "Core"; - case CPU::ARM : return "Arm"; - case CPU::UNKNOWN : return "Unknown CPU"; - } - return "Unknown CPU (error)"; - } - -#if !defined(__ARM_NEON) - /* constants to access destination registers of CPUID instruction */ - static const int EAX = 0; - static const int EBX = 1; - static const int ECX = 2; - static const int EDX = 3; - - /* cpuid[eax=1].ecx */ - static const int CPU_FEATURE_BIT_SSE3 = 1 << 0; - static const int CPU_FEATURE_BIT_SSSE3 = 1 << 9; - static const int CPU_FEATURE_BIT_FMA3 = 1 << 12; - static const int CPU_FEATURE_BIT_SSE4_1 = 1 << 19; - static const int CPU_FEATURE_BIT_SSE4_2 = 1 << 20; - //static const int CPU_FEATURE_BIT_MOVBE = 1 << 22; - static const int CPU_FEATURE_BIT_POPCNT = 1 << 23; - //static const int CPU_FEATURE_BIT_XSAVE = 1 << 26; - static const int CPU_FEATURE_BIT_OXSAVE = 1 << 27; - static const int CPU_FEATURE_BIT_AVX = 1 << 28; - static const int CPU_FEATURE_BIT_F16C = 1 << 29; - static const int CPU_FEATURE_BIT_RDRAND = 1 << 30; - - /* cpuid[eax=1].edx */ - static const int CPU_FEATURE_BIT_SSE = 1 << 25; - static const int CPU_FEATURE_BIT_SSE2 = 1 << 26; - - /* cpuid[eax=0x80000001].ecx */ - static const int CPU_FEATURE_BIT_LZCNT = 1 << 5; - - /* cpuid[eax=7,ecx=0].ebx */ - static const int CPU_FEATURE_BIT_BMI1 = 1 << 3; - static const int CPU_FEATURE_BIT_AVX2 = 1 << 5; - static const int CPU_FEATURE_BIT_BMI2 = 1 << 8; - static const int CPU_FEATURE_BIT_AVX512F = 1 << 16; // AVX512F (foundation) - static const int CPU_FEATURE_BIT_AVX512DQ = 1 << 17; // AVX512DQ (doubleword and quadword instructions) - static const int CPU_FEATURE_BIT_AVX512PF = 1 << 26; // AVX512PF (prefetch gather/scatter instructions) - static const int CPU_FEATURE_BIT_AVX512ER = 1 << 27; // AVX512ER (exponential and reciprocal instructions) - static const int CPU_FEATURE_BIT_AVX512CD = 1 << 28; // AVX512CD (conflict detection instructions) - static const int CPU_FEATURE_BIT_AVX512BW = 1 << 30; // AVX512BW (byte and word instructions) - static const int CPU_FEATURE_BIT_AVX512VL = 1 << 31; // AVX512VL (vector length extensions) - static const int CPU_FEATURE_BIT_AVX512IFMA = 1 << 21; // AVX512IFMA (integer fused multiple-add instructions) - - /* cpuid[eax=7,ecx=0].ecx */ - static const int CPU_FEATURE_BIT_AVX512VBMI = 1 << 1; // AVX512VBMI (vector bit manipulation instructions) -#endif - -#if !defined(__ARM_NEON) - __noinline int64_t get_xcr0() - { - // https://github.com/opencv/opencv/blob/master/modules/core/src/system.cpp#L466 -#if defined (__WIN32__) && defined(_XCR_XFEATURE_ENABLED_MASK) - int64_t xcr0 = 0; // int64_t is workaround for compiler bug under VS2013, Win32 - xcr0 = _xgetbv(0); - return xcr0; -#else - int xcr0 = 0; - __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" ); - return xcr0; -#endif - } -#endif - - int getCPUFeatures() - { -#if defined(__ARM_NEON) - int cpu_features = CPU_FEATURE_NEON|CPU_FEATURE_SSE|CPU_FEATURE_SSE2; -#if defined(NEON_AVX2_EMULATION) - cpu_features |= CPU_FEATURE_SSE3|CPU_FEATURE_SSSE3|CPU_FEATURE_SSE42; - cpu_features |= CPU_FEATURE_XMM_ENABLED; - cpu_features |= CPU_FEATURE_YMM_ENABLED; - cpu_features |= CPU_FEATURE_SSE41 | CPU_FEATURE_RDRAND | CPU_FEATURE_F16C; - cpu_features |= CPU_FEATURE_POPCNT; - cpu_features |= CPU_FEATURE_AVX; - cpu_features |= CPU_FEATURE_AVX2; - cpu_features |= CPU_FEATURE_FMA3; - cpu_features |= CPU_FEATURE_LZCNT; - cpu_features |= CPU_FEATURE_BMI1; - cpu_features |= CPU_FEATURE_BMI2; - cpu_features |= CPU_FEATURE_NEON_2X; - - - -#endif - return cpu_features; - -#else - /* cache CPU features access */ - static int cpu_features = 0; - if (cpu_features) - return cpu_features; - - /* get number of CPUID leaves */ - int cpuid_leaf0[4]; - __cpuid(cpuid_leaf0, 0x00000000); - unsigned nIds = cpuid_leaf0[EAX]; - - /* get number of extended CPUID leaves */ - int cpuid_leafe[4]; - __cpuid(cpuid_leafe, 0x80000000); - unsigned nExIds = cpuid_leafe[EAX]; - - /* get CPUID leaves for EAX = 1,7, and 0x80000001 */ - int cpuid_leaf_1[4] = { 0,0,0,0 }; - int cpuid_leaf_7[4] = { 0,0,0,0 }; - int cpuid_leaf_e1[4] = { 0,0,0,0 }; - if (nIds >= 1) __cpuid (cpuid_leaf_1,0x00000001); -#if _WIN32 -#if _MSC_VER && (_MSC_FULL_VER < 160040219) -#else - if (nIds >= 7) __cpuidex(cpuid_leaf_7,0x00000007,0); -#endif -#else - if (nIds >= 7) __cpuid_count(cpuid_leaf_7,0x00000007,0); -#endif - if (nExIds >= 0x80000001) __cpuid(cpuid_leaf_e1,0x80000001); - - /* detect if OS saves XMM, YMM, and ZMM states */ - bool xmm_enabled = true; - bool ymm_enabled = false; - bool zmm_enabled = false; - if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_OXSAVE) { - int64_t xcr0 = get_xcr0(); - xmm_enabled = ((xcr0 & 0x02) == 0x02); /* checks if xmm are enabled in XCR0 */ - ymm_enabled = xmm_enabled && ((xcr0 & 0x04) == 0x04); /* checks if ymm state are enabled in XCR0 */ - zmm_enabled = ymm_enabled && ((xcr0 & 0xE0) == 0xE0); /* checks if OPMASK state, upper 256-bit of ZMM0-ZMM15 and ZMM16-ZMM31 state are enabled in XCR0 */ - } - if (xmm_enabled) cpu_features |= CPU_FEATURE_XMM_ENABLED; - if (ymm_enabled) cpu_features |= CPU_FEATURE_YMM_ENABLED; - if (zmm_enabled) cpu_features |= CPU_FEATURE_ZMM_ENABLED; - - if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE ) cpu_features |= CPU_FEATURE_SSE; - if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE2 ) cpu_features |= CPU_FEATURE_SSE2; - if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE3 ) cpu_features |= CPU_FEATURE_SSE3; - if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSSE3 ) cpu_features |= CPU_FEATURE_SSSE3; - if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_1) cpu_features |= CPU_FEATURE_SSE41; - if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_2) cpu_features |= CPU_FEATURE_SSE42; - if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_POPCNT) cpu_features |= CPU_FEATURE_POPCNT; - if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_AVX ) cpu_features |= CPU_FEATURE_AVX; - - if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_F16C ) cpu_features |= CPU_FEATURE_F16C; - if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_RDRAND) cpu_features |= CPU_FEATURE_RDRAND; - if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX2 ) cpu_features |= CPU_FEATURE_AVX2; - if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_FMA3 ) cpu_features |= CPU_FEATURE_FMA3; - if (cpuid_leaf_e1[ECX] & CPU_FEATURE_BIT_LZCNT) cpu_features |= CPU_FEATURE_LZCNT; - if (cpuid_leaf_7 [EBX] & CPU_FEATURE_BIT_BMI1 ) cpu_features |= CPU_FEATURE_BMI1; - if (cpuid_leaf_7 [EBX] & CPU_FEATURE_BIT_BMI2 ) cpu_features |= CPU_FEATURE_BMI2; - - if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512F ) cpu_features |= CPU_FEATURE_AVX512F; - if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512DQ ) cpu_features |= CPU_FEATURE_AVX512DQ; - if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512PF ) cpu_features |= CPU_FEATURE_AVX512PF; - if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512ER ) cpu_features |= CPU_FEATURE_AVX512ER; - if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512CD ) cpu_features |= CPU_FEATURE_AVX512CD; - if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512BW ) cpu_features |= CPU_FEATURE_AVX512BW; - if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512IFMA) cpu_features |= CPU_FEATURE_AVX512IFMA; - if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512VL ) cpu_features |= CPU_FEATURE_AVX512VL; - if (cpuid_leaf_7[ECX] & CPU_FEATURE_BIT_AVX512VBMI) cpu_features |= CPU_FEATURE_AVX512VBMI; - - return cpu_features; -#endif - } - - std::string stringOfCPUFeatures(int features) - { - std::string str; - if (features & CPU_FEATURE_XMM_ENABLED) str += "XMM "; - if (features & CPU_FEATURE_YMM_ENABLED) str += "YMM "; - if (features & CPU_FEATURE_ZMM_ENABLED) str += "ZMM "; - if (features & CPU_FEATURE_SSE ) str += "SSE "; - if (features & CPU_FEATURE_SSE2 ) str += "SSE2 "; - if (features & CPU_FEATURE_SSE3 ) str += "SSE3 "; - if (features & CPU_FEATURE_SSSE3 ) str += "SSSE3 "; - if (features & CPU_FEATURE_SSE41 ) str += "SSE4.1 "; - if (features & CPU_FEATURE_SSE42 ) str += "SSE4.2 "; - if (features & CPU_FEATURE_POPCNT) str += "POPCNT "; - if (features & CPU_FEATURE_AVX ) str += "AVX "; - if (features & CPU_FEATURE_F16C ) str += "F16C "; - if (features & CPU_FEATURE_RDRAND) str += "RDRAND "; - if (features & CPU_FEATURE_AVX2 ) str += "AVX2 "; - if (features & CPU_FEATURE_FMA3 ) str += "FMA3 "; - if (features & CPU_FEATURE_LZCNT ) str += "LZCNT "; - if (features & CPU_FEATURE_BMI1 ) str += "BMI1 "; - if (features & CPU_FEATURE_BMI2 ) str += "BMI2 "; - if (features & CPU_FEATURE_AVX512F) str += "AVX512F "; - if (features & CPU_FEATURE_AVX512DQ) str += "AVX512DQ "; - if (features & CPU_FEATURE_AVX512PF) str += "AVX512PF "; - if (features & CPU_FEATURE_AVX512ER) str += "AVX512ER "; - if (features & CPU_FEATURE_AVX512CD) str += "AVX512CD "; - if (features & CPU_FEATURE_AVX512BW) str += "AVX512BW "; - if (features & CPU_FEATURE_AVX512VL) str += "AVX512VL "; - if (features & CPU_FEATURE_AVX512IFMA) str += "AVX512IFMA "; - if (features & CPU_FEATURE_AVX512VBMI) str += "AVX512VBMI "; - if (features & CPU_FEATURE_NEON) str += "NEON "; - if (features & CPU_FEATURE_NEON_2X) str += "2xNEON "; - return str; - } - - std::string stringOfISA (int isa) - { - if (isa == SSE) return "SSE"; - if (isa == SSE2) return "SSE2"; - if (isa == SSE3) return "SSE3"; - if (isa == SSSE3) return "SSSE3"; - if (isa == SSE41) return "SSE4.1"; - if (isa == SSE42) return "SSE4.2"; - if (isa == AVX) return "AVX"; - if (isa == AVX2) return "AVX2"; - if (isa == AVX512KNL) return "AVX512KNL"; - if (isa == AVX512SKX) return "AVX512SKX"; - if (isa == NEON) return "NEON"; - if (isa == NEON_2X) return "2xNEON"; - return "UNKNOWN"; - } - - bool hasISA(int features, int isa) { - return (features & isa) == isa; - } - - std::string supportedTargetList (int features) - { - std::string v; - if (hasISA(features,SSE)) v += "SSE "; - if (hasISA(features,SSE2)) v += "SSE2 "; - if (hasISA(features,SSE3)) v += "SSE3 "; - if (hasISA(features,SSSE3)) v += "SSSE3 "; - if (hasISA(features,SSE41)) v += "SSE4.1 "; - if (hasISA(features,SSE42)) v += "SSE4.2 "; - if (hasISA(features,AVX)) v += "AVX "; - if (hasISA(features,AVXI)) v += "AVXI "; - if (hasISA(features,AVX2)) v += "AVX2 "; - if (hasISA(features,AVX512KNL)) v += "AVX512KNL "; - if (hasISA(features,AVX512SKX)) v += "AVX512SKX "; - if (hasISA(features,NEON)) v += "NEON "; - if (hasISA(features,NEON_2X)) v += "2xNEON "; - return v; - } -} - -//////////////////////////////////////////////////////////////////////////////// -/// Windows Platform -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__WIN32__) - -#define WIN32_LEAN_AND_MEAN -#include <windows.h> -#include <psapi.h> - -namespace embree -{ - std::string getExecutableFileName() { - char filename[1024]; - if (!GetModuleFileName(nullptr, filename, sizeof(filename))) - return std::string(); - return std::string(filename); - } - - unsigned int getNumberOfLogicalThreads() - { - static int nThreads = -1; - if (nThreads != -1) return nThreads; - - typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)(); - typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD); - HMODULE hlib = LoadLibrary("Kernel32"); - GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount"); - GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc) GetProcAddress(hlib, "GetActiveProcessorCount"); - - if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount) - { - int groups = pGetActiveProcessorGroupCount(); - int totalProcessors = 0; - for (int i = 0; i < groups; i++) - totalProcessors += pGetActiveProcessorCount(i); - nThreads = totalProcessors; - } - else - { - SYSTEM_INFO sysinfo; - GetSystemInfo(&sysinfo); - nThreads = sysinfo.dwNumberOfProcessors; - } - assert(nThreads); - return nThreads; - } - - int getTerminalWidth() - { - HANDLE handle = GetStdHandle(STD_OUTPUT_HANDLE); - if (handle == INVALID_HANDLE_VALUE) return 80; - CONSOLE_SCREEN_BUFFER_INFO info; - memset(&info,0,sizeof(info)); - GetConsoleScreenBufferInfo(handle, &info); - return info.dwSize.X; - } - - double getSeconds() - { - LARGE_INTEGER freq, val; - QueryPerformanceFrequency(&freq); - QueryPerformanceCounter(&val); - return (double)val.QuadPart / (double)freq.QuadPart; - } - - void sleepSeconds(double t) { - Sleep(DWORD(1000.0*t)); - } - - size_t getVirtualMemoryBytes() - { - PROCESS_MEMORY_COUNTERS info; - GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) ); - return (size_t)info.QuotaPeakPagedPoolUsage; - } - - size_t getResidentMemoryBytes() - { - PROCESS_MEMORY_COUNTERS info; - GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) ); - return (size_t)info.WorkingSetSize; - } -} -#endif - -//////////////////////////////////////////////////////////////////////////////// -/// Linux Platform -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__LINUX__) - -#include <stdio.h> -#include <unistd.h> - -namespace embree -{ - std::string getExecutableFileName() - { - std::string pid = "/proc/" + toString(getpid()) + "/exe"; - char buf[4096]; - memset(buf,0,sizeof(buf)); - if (readlink(pid.c_str(), buf, sizeof(buf)-1) == -1) - return std::string(); - return std::string(buf); - } - - size_t getVirtualMemoryBytes() - { - size_t virt, resident, shared; - std::ifstream buffer("/proc/self/statm"); - buffer >> virt >> resident >> shared; - return virt*sysconf(_SC_PAGE_SIZE); - } - - size_t getResidentMemoryBytes() - { - size_t virt, resident, shared; - std::ifstream buffer("/proc/self/statm"); - buffer >> virt >> resident >> shared; - return resident*sysconf(_SC_PAGE_SIZE); - } -} - -#endif - -//////////////////////////////////////////////////////////////////////////////// -/// FreeBSD Platform -//////////////////////////////////////////////////////////////////////////////// - -#if defined (__FreeBSD__) - -#include <sys/sysctl.h> - -namespace embree -{ - std::string getExecutableFileName() - { - const int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 }; - char buf[4096]; - memset(buf,0,sizeof(buf)); - size_t len = sizeof(buf)-1; - if (sysctl(mib, 4, buf, &len, 0x0, 0) == -1) - return std::string(); - return std::string(buf); - } - - size_t getVirtualMemoryBytes() { - return 0; - } - - size_t getResidentMemoryBytes() { - return 0; - } -} - -#endif - -//////////////////////////////////////////////////////////////////////////////// -/// Mac OS X Platform -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__MACOSX__) - -#include <mach-o/dyld.h> - -namespace embree -{ - std::string getExecutableFileName() - { - char buf[4096]; - uint32_t size = sizeof(buf); - if (_NSGetExecutablePath(buf, &size) != 0) - return std::string(); - return std::string(buf); - } - - size_t getVirtualMemoryBytes() { - return 0; - } - - size_t getResidentMemoryBytes() { - return 0; - } -} - -#endif - -//////////////////////////////////////////////////////////////////////////////// -/// Unix Platform -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__UNIX__) - -#include <unistd.h> -#include <sys/ioctl.h> -#include <sys/time.h> -#include <pthread.h> - -namespace embree -{ - unsigned int getNumberOfLogicalThreads() - { - static int nThreads = -1; - if (nThreads != -1) return nThreads; - -#if defined(__MACOSX__) || defined(__ANDROID__) - nThreads = sysconf(_SC_NPROCESSORS_ONLN); // does not work in Linux LXC container - assert(nThreads); -#else - cpu_set_t set; - if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0) - nThreads = CPU_COUNT(&set); -#endif - - assert(nThreads); - return nThreads; - } - - int getTerminalWidth() - { - struct winsize info; - if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &info) < 0) return 80; - return info.ws_col; - } - - double getSeconds() { - struct timeval tp; gettimeofday(&tp,nullptr); - return double(tp.tv_sec) + double(tp.tv_usec)/1E6; - } - - void sleepSeconds(double t) { - usleep(1000000.0*t); - } -} -#endif - diff --git a/thirdparty/embree-aarch64/common/sys/sysinfo.h b/thirdparty/embree-aarch64/common/sys/sysinfo.h deleted file mode 100644 index 8e313a59b3..0000000000 --- a/thirdparty/embree-aarch64/common/sys/sysinfo.h +++ /dev/null @@ -1,192 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#define CACHELINE_SIZE 64 - -#if !defined(PAGE_SIZE) - #define PAGE_SIZE 4096 -#endif - -#define PAGE_SIZE_2M (2*1024*1024) -#define PAGE_SIZE_4K (4*1024) - -#include "platform.h" - -/* define isa namespace and ISA bitvector */ -#if defined (__AVX512VL__) -# define isa avx512skx -# define ISA AVX512SKX -# define ISA_STR "AVX512SKX" -#elif defined (__AVX512F__) -# define isa avx512knl -# define ISA AVX512KNL -# define ISA_STR "AVX512KNL" -#elif defined (__AVX2__) -# define isa avx2 -# define ISA AVX2 -# define ISA_STR "AVX2" -#elif defined(__AVXI__) -# define isa avxi -# define ISA AVXI -# define ISA_STR "AVXI" -#elif defined(__AVX__) -# define isa avx -# define ISA AVX -# define ISA_STR "AVX" -#elif defined (__SSE4_2__) -# define isa sse42 -# define ISA SSE42 -# define ISA_STR "SSE4.2" -//#elif defined (__SSE4_1__) // we demote this to SSE2, MacOSX code compiles with SSE41 by default with XCode 11 -//# define isa sse41 -//# define ISA SSE41 -//# define ISA_STR "SSE4.1" -//#elif defined(__SSSE3__) // we demote this to SSE2, MacOSX code compiles with SSSE3 by default with ICC -//# define isa ssse3 -//# define ISA SSSE3 -//# define ISA_STR "SSSE3" -//#elif defined(__SSE3__) // we demote this to SSE2, MacOSX code compiles with SSE3 by default with clang -//# define isa sse3 -//# define ISA SSE3 -//# define ISA_STR "SSE3" -#elif defined(__SSE2__) || defined(__SSE3__) || defined(__SSSE3__) -# define isa sse2 -# define ISA SSE2 -# define ISA_STR "SSE2" -#elif defined(__SSE__) -# define isa sse -# define ISA SSE -# define ISA_STR "SSE" -#elif defined(__ARM_NEON) -// NOTE(LTE): Use sse2 for `isa` for the compatibility at the moment. -#define isa sse2 -#define ISA NEON -#define ISA_STR "NEON" -#else -#error Unknown ISA -#endif - -namespace embree -{ - enum class CPU - { - XEON_ICE_LAKE, - CORE_ICE_LAKE, - CORE_TIGER_LAKE, - CORE_COMET_LAKE, - CORE_CANNON_LAKE, - CORE_KABY_LAKE, - XEON_SKY_LAKE, - CORE_SKY_LAKE, - XEON_PHI_KNIGHTS_MILL, - XEON_PHI_KNIGHTS_LANDING, - XEON_BROADWELL, - CORE_BROADWELL, - XEON_HASWELL, - CORE_HASWELL, - XEON_IVY_BRIDGE, - CORE_IVY_BRIDGE, - SANDY_BRIDGE, - NEHALEM, - CORE2, - CORE1, - ARM, - UNKNOWN, - }; - - /*! get the full path to the running executable */ - std::string getExecutableFileName(); - - /*! return platform name */ - std::string getPlatformName(); - - /*! get the full name of the compiler */ - std::string getCompilerName(); - - /*! return the name of the CPU */ - std::string getCPUVendor(); - - /*! get microprocessor model */ - CPU getCPUModel(); - - /*! converts CPU model into string */ - std::string stringOfCPUModel(CPU model); - - /*! CPU features */ - static const int CPU_FEATURE_SSE = 1 << 0; - static const int CPU_FEATURE_SSE2 = 1 << 1; - static const int CPU_FEATURE_SSE3 = 1 << 2; - static const int CPU_FEATURE_SSSE3 = 1 << 3; - static const int CPU_FEATURE_SSE41 = 1 << 4; - static const int CPU_FEATURE_SSE42 = 1 << 5; - static const int CPU_FEATURE_POPCNT = 1 << 6; - static const int CPU_FEATURE_AVX = 1 << 7; - static const int CPU_FEATURE_F16C = 1 << 8; - static const int CPU_FEATURE_RDRAND = 1 << 9; - static const int CPU_FEATURE_AVX2 = 1 << 10; - static const int CPU_FEATURE_FMA3 = 1 << 11; - static const int CPU_FEATURE_LZCNT = 1 << 12; - static const int CPU_FEATURE_BMI1 = 1 << 13; - static const int CPU_FEATURE_BMI2 = 1 << 14; - static const int CPU_FEATURE_AVX512F = 1 << 16; - static const int CPU_FEATURE_AVX512DQ = 1 << 17; - static const int CPU_FEATURE_AVX512PF = 1 << 18; - static const int CPU_FEATURE_AVX512ER = 1 << 19; - static const int CPU_FEATURE_AVX512CD = 1 << 20; - static const int CPU_FEATURE_AVX512BW = 1 << 21; - static const int CPU_FEATURE_AVX512VL = 1 << 22; - static const int CPU_FEATURE_AVX512IFMA = 1 << 23; - static const int CPU_FEATURE_AVX512VBMI = 1 << 24; - static const int CPU_FEATURE_XMM_ENABLED = 1 << 25; - static const int CPU_FEATURE_YMM_ENABLED = 1 << 26; - static const int CPU_FEATURE_ZMM_ENABLED = 1 << 27; - static const int CPU_FEATURE_NEON = 1 << 28; - static const int CPU_FEATURE_NEON_2X = 1 << 29; - - /*! get CPU features */ - int getCPUFeatures(); - - /*! convert CPU features into a string */ - std::string stringOfCPUFeatures(int features); - - /*! creates a string of all supported targets that are supported */ - std::string supportedTargetList (int isa); - - /*! ISAs */ - static const int SSE = CPU_FEATURE_SSE | CPU_FEATURE_XMM_ENABLED; - static const int SSE2 = SSE | CPU_FEATURE_SSE2; - static const int SSE3 = SSE2 | CPU_FEATURE_SSE3; - static const int SSSE3 = SSE3 | CPU_FEATURE_SSSE3; - static const int SSE41 = SSSE3 | CPU_FEATURE_SSE41; - static const int SSE42 = SSE41 | CPU_FEATURE_SSE42 | CPU_FEATURE_POPCNT; - static const int AVX = SSE42 | CPU_FEATURE_AVX | CPU_FEATURE_YMM_ENABLED; - static const int AVXI = AVX | CPU_FEATURE_F16C | CPU_FEATURE_RDRAND; - static const int AVX2 = AVXI | CPU_FEATURE_AVX2 | CPU_FEATURE_FMA3 | CPU_FEATURE_BMI1 | CPU_FEATURE_BMI2 | CPU_FEATURE_LZCNT; - static const int AVX512KNL = AVX2 | CPU_FEATURE_AVX512F | CPU_FEATURE_AVX512PF | CPU_FEATURE_AVX512ER | CPU_FEATURE_AVX512CD | CPU_FEATURE_ZMM_ENABLED; - static const int AVX512SKX = AVX2 | CPU_FEATURE_AVX512F | CPU_FEATURE_AVX512DQ | CPU_FEATURE_AVX512CD | CPU_FEATURE_AVX512BW | CPU_FEATURE_AVX512VL | CPU_FEATURE_ZMM_ENABLED; - static const int NEON = CPU_FEATURE_NEON | CPU_FEATURE_SSE | CPU_FEATURE_SSE2; - static const int NEON_2X = CPU_FEATURE_NEON_2X | AVX2; - - /*! converts ISA bitvector into a string */ - std::string stringOfISA(int features); - - /*! return the number of logical threads of the system */ - unsigned int getNumberOfLogicalThreads(); - - /*! returns the size of the terminal window in characters */ - int getTerminalWidth(); - - /*! returns performance counter in seconds */ - double getSeconds(); - - /*! sleeps the specified number of seconds */ - void sleepSeconds(double t); - - /*! returns virtual address space occupied by process */ - size_t getVirtualMemoryBytes(); - - /*! returns resident memory required by process */ - size_t getResidentMemoryBytes(); -} diff --git a/thirdparty/embree-aarch64/common/sys/thread.cpp b/thirdparty/embree-aarch64/common/sys/thread.cpp deleted file mode 100644 index f9ea5b7d96..0000000000 --- a/thirdparty/embree-aarch64/common/sys/thread.cpp +++ /dev/null @@ -1,429 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "thread.h" -#include "sysinfo.h" -#include "string.h" - -#include <iostream> -#if defined(__ARM_NEON) -#include "../math/SSE2NEON.h" -#else -#include <xmmintrin.h> -#endif - -#if defined(PTHREADS_WIN32) -#pragma comment (lib, "pthreadVC.lib") -#endif - -//////////////////////////////////////////////////////////////////////////////// -/// Windows Platform -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__WIN32__) - -#define WIN32_LEAN_AND_MEAN -#include <windows.h> - -namespace embree -{ - /*! set the affinity of a given thread */ - void setAffinity(HANDLE thread, ssize_t affinity) - { - typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)(); - typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD); - typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY); - typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER); - HMODULE hlib = LoadLibrary("Kernel32"); - GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount"); - GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount"); - SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity"); - SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx"); - if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx) - { - int groups = pGetActiveProcessorGroupCount(); - int totalProcessors = 0, group = 0, number = 0; - for (int i = 0; i<groups; i++) { - int processors = pGetActiveProcessorCount(i); - if (totalProcessors + processors > affinity) { - group = i; - number = (int)affinity - totalProcessors; - break; - } - totalProcessors += processors; - } - - GROUP_AFFINITY groupAffinity; - groupAffinity.Group = (WORD)group; - groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number); - groupAffinity.Reserved[0] = 0; - groupAffinity.Reserved[1] = 0; - groupAffinity.Reserved[2] = 0; - if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr)) - WARNING("SetThreadGroupAffinity failed"); // on purpose only a warning - - PROCESSOR_NUMBER processorNumber; - processorNumber.Group = group; - processorNumber.Number = number; - processorNumber.Reserved = 0; - if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr)) - WARNING("SetThreadIdealProcessorEx failed"); // on purpose only a warning - } - else - { - if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity))) - WARNING("SetThreadAffinityMask failed"); // on purpose only a warning - if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1) - WARNING("SetThreadIdealProcessor failed"); // on purpose only a warning - } - } - - /*! set affinity of the calling thread */ - void setAffinity(ssize_t affinity) { - setAffinity(GetCurrentThread(), affinity); - } - - struct ThreadStartupData - { - public: - ThreadStartupData (thread_func f, void* arg) - : f(f), arg(arg) {} - public: - thread_func f; - void* arg; - }; - - DWORD WINAPI threadStartup(LPVOID ptr) - { - ThreadStartupData* parg = (ThreadStartupData*) ptr; - _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6)); - parg->f(parg->arg); - delete parg; - parg = nullptr; - return 0; - } - -#if !defined(PTHREADS_WIN32) - - /*! creates a hardware thread running on specific core */ - thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID) - { - HANDLE thread = CreateThread(nullptr, stack_size, threadStartup, new ThreadStartupData(f,arg), 0, nullptr); - if (thread == nullptr) FATAL("CreateThread failed"); - if (threadID >= 0) setAffinity(thread, threadID); - return thread_t(thread); - } - - /*! the thread calling this function gets yielded */ - void yield() { - SwitchToThread(); - } - - /*! waits until the given thread has terminated */ - void join(thread_t tid) { - WaitForSingleObject(HANDLE(tid), INFINITE); - CloseHandle(HANDLE(tid)); - } - - /*! creates thread local storage */ - tls_t createTls() { - return tls_t(size_t(TlsAlloc())); - } - - /*! set the thread local storage pointer */ - void setTls(tls_t tls, void* const ptr) { - TlsSetValue(DWORD(size_t(tls)), ptr); - } - - /*! return the thread local storage pointer */ - void* getTls(tls_t tls) { - return TlsGetValue(DWORD(size_t(tls))); - } - - /*! destroys thread local storage identifier */ - void destroyTls(tls_t tls) { - TlsFree(DWORD(size_t(tls))); - } -#endif -} - -#endif - -//////////////////////////////////////////////////////////////////////////////// -/// Linux Platform -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__LINUX__) - -#include <fstream> -#include <sstream> -#include <algorithm> - -#if defined(__ANDROID__) -#include <pthread.h> -#endif - -namespace embree -{ - static MutexSys mutex; - static std::vector<size_t> threadIDs; - -#if !defined(__ANDROID__) // TODO(LTE): Implement for Android target - /* changes thread ID mapping such that we first fill up all thread on one core */ - size_t mapThreadID(size_t threadID) - { - Lock<MutexSys> lock(mutex); - - if (threadIDs.size() == 0) - { - /* parse thread/CPU topology */ - for (size_t cpuID=0;;cpuID++) - { - std::fstream fs; - std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string((long long)cpuID) + std::string("/topology/thread_siblings_list"); - fs.open (cpu.c_str(), std::fstream::in); - if (fs.fail()) break; - - int i; - while (fs >> i) - { - if (std::none_of(threadIDs.begin(),threadIDs.end(),[&] (int id) { return id == i; })) - threadIDs.push_back(i); - if (fs.peek() == ',') - fs.ignore(); - } - fs.close(); - } - -#if 0 - for (size_t i=0;i<threadIDs.size();i++) - std::cout << i << " -> " << threadIDs[i] << std::endl; -#endif - - /* verify the mapping and do not use it if the mapping has errors */ - for (size_t i=0;i<threadIDs.size();i++) { - for (size_t j=0;j<threadIDs.size();j++) { - if (i != j && threadIDs[i] == threadIDs[j]) { - threadIDs.clear(); - } - } - } - } - - /* re-map threadIDs if mapping is available */ - size_t ID = threadID; - if (threadID < threadIDs.size()) - ID = threadIDs[threadID]; - - /* find correct thread to affinitize to */ - cpu_set_t set; - if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0) - { - for (int i=0, j=0; i<CPU_SETSIZE; i++) - { - if (!CPU_ISSET(i,&set)) continue; - - if (j == ID) { - ID = i; - break; - } - j++; - } - } - - return ID; - } -#endif - - /*! set affinity of the calling thread */ - void setAffinity(ssize_t affinity) - { -#if defined(__ANDROID__) - // TODO(LTE): Implement -#else - cpu_set_t cset; - CPU_ZERO(&cset); - size_t threadID = mapThreadID(affinity); - CPU_SET(threadID, &cset); - - pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset); -#endif - } -} -#endif - -//////////////////////////////////////////////////////////////////////////////// -/// FreeBSD Platform -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__FreeBSD__) - -#include <pthread_np.h> - -namespace embree -{ - /*! set affinity of the calling thread */ - void setAffinity(ssize_t affinity) - { - cpuset_t cset; - CPU_ZERO(&cset); - CPU_SET(affinity, &cset); - - pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset); - } -} -#endif - -//////////////////////////////////////////////////////////////////////////////// -/// MacOSX Platform -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__MACOSX__) - -#include <mach/thread_act.h> -#include <mach/thread_policy.h> -#include <mach/mach_init.h> - -namespace embree -{ - /*! set affinity of the calling thread */ - void setAffinity(ssize_t affinity) - { - thread_affinity_policy ap; - ap.affinity_tag = affinity; - if (thread_policy_set(mach_thread_self(),THREAD_AFFINITY_POLICY,(thread_policy_t)&ap,THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS) - WARNING("setting thread affinity failed"); // on purpose only a warning - } -} -#endif - -//////////////////////////////////////////////////////////////////////////////// -/// Unix Platform -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__UNIX__) || defined(PTHREADS_WIN32) - -#include <pthread.h> -#include <sched.h> - -#if defined(__USE_NUMA__) -#include <numa.h> -#endif - -namespace embree -{ - struct ThreadStartupData - { - public: - ThreadStartupData (thread_func f, void* arg, int affinity) - : f(f), arg(arg), affinity(affinity) {} - public: - thread_func f; - void* arg; - ssize_t affinity; - }; - - static void* threadStartup(ThreadStartupData* parg) - { - _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6)); - - /*! Mac OS X does not support setting affinity at thread creation time */ -#if defined(__MACOSX__) - if (parg->affinity >= 0) - setAffinity(parg->affinity); -#endif - - parg->f(parg->arg); - delete parg; - parg = nullptr; - return nullptr; - } - - /*! creates a hardware thread running on specific core */ - thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID) - { - /* set stack size */ - pthread_attr_t attr; - pthread_attr_init(&attr); - if (stack_size > 0) pthread_attr_setstacksize (&attr, stack_size); - - /* create thread */ - pthread_t* tid = new pthread_t; - if (pthread_create(tid,&attr,(void*(*)(void*))threadStartup,new ThreadStartupData(f,arg,threadID)) != 0) { - pthread_attr_destroy(&attr); - delete tid; - FATAL("pthread_create failed"); - } - pthread_attr_destroy(&attr); - - /* set affinity */ -#if defined(__LINUX__) && !defined(__ANDROID__) - if (threadID >= 0) { - cpu_set_t cset; - CPU_ZERO(&cset); - threadID = mapThreadID(threadID); - CPU_SET(threadID, &cset); - pthread_setaffinity_np(*tid, sizeof(cset), &cset); - } -#elif defined(__FreeBSD__) - if (threadID >= 0) { - cpuset_t cset; - CPU_ZERO(&cset); - CPU_SET(threadID, &cset); - pthread_setaffinity_np(*tid, sizeof(cset), &cset); - } -#endif - - return thread_t(tid); - } - - /*! the thread calling this function gets yielded */ - void yield() { - sched_yield(); - } - - /*! waits until the given thread has terminated */ - void join(thread_t tid) { - if (pthread_join(*(pthread_t*)tid, nullptr) != 0) - FATAL("pthread_join failed"); - delete (pthread_t*)tid; - } - - /*! creates thread local storage */ - tls_t createTls() - { - pthread_key_t* key = new pthread_key_t; - if (pthread_key_create(key,nullptr) != 0) { - delete key; - FATAL("pthread_key_create failed"); - } - - return tls_t(key); - } - - /*! return the thread local storage pointer */ - void* getTls(tls_t tls) - { - assert(tls); - return pthread_getspecific(*(pthread_key_t*)tls); - } - - /*! set the thread local storage pointer */ - void setTls(tls_t tls, void* const ptr) - { - assert(tls); - if (pthread_setspecific(*(pthread_key_t*)tls, ptr) != 0) - FATAL("pthread_setspecific failed"); - } - - /*! destroys thread local storage identifier */ - void destroyTls(tls_t tls) - { - assert(tls); - if (pthread_key_delete(*(pthread_key_t*)tls) != 0) - FATAL("pthread_key_delete failed"); - delete (pthread_key_t*)tls; - } -} - -#endif diff --git a/thirdparty/embree-aarch64/common/sys/thread.h b/thirdparty/embree-aarch64/common/sys/thread.h deleted file mode 100644 index 45da6e6a70..0000000000 --- a/thirdparty/embree-aarch64/common/sys/thread.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "platform.h" -#include "mutex.h" -#include "alloc.h" -#include "vector.h" -#include <vector> - -namespace embree -{ - /*! type for thread */ - typedef struct opaque_thread_t* thread_t; - - /*! signature of thread start function */ - typedef void (*thread_func)(void*); - - /*! creates a hardware thread running on specific logical thread */ - thread_t createThread(thread_func f, void* arg, size_t stack_size = 0, ssize_t threadID = -1); - - /*! set affinity of the calling thread */ - void setAffinity(ssize_t affinity); - - /*! the thread calling this function gets yielded */ - void yield(); - - /*! waits until the given thread has terminated */ - void join(thread_t tid); - - /*! type for handle to thread local storage */ - typedef struct opaque_tls_t* tls_t; - - /*! creates thread local storage */ - tls_t createTls(); - - /*! set the thread local storage pointer */ - void setTls(tls_t tls, void* const ptr); - - /*! return the thread local storage pointer */ - void* getTls(tls_t tls); - - /*! destroys thread local storage identifier */ - void destroyTls(tls_t tls); -} diff --git a/thirdparty/embree-aarch64/common/sys/vector.h b/thirdparty/embree-aarch64/common/sys/vector.h deleted file mode 100644 index e41794de7c..0000000000 --- a/thirdparty/embree-aarch64/common/sys/vector.h +++ /dev/null @@ -1,242 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "alloc.h" -#include <algorithm> - -namespace embree -{ - template<typename T, typename allocator> - class vector_t - { - public: - typedef T value_type; - typedef T* iterator; - typedef const T* const_iterator; - - __forceinline vector_t () - : size_active(0), size_alloced(0), items(nullptr) {} - - __forceinline explicit vector_t (size_t sz) - : size_active(0), size_alloced(0), items(nullptr) { internal_resize_init(sz); } - - template<typename M> - __forceinline explicit vector_t (M alloc, size_t sz) - : alloc(alloc), size_active(0), size_alloced(0), items(nullptr) { internal_resize_init(sz); } - - __forceinline ~vector_t() { - clear(); - } - - __forceinline vector_t (const vector_t& other) - { - size_active = other.size_active; - size_alloced = other.size_alloced; - items = alloc.allocate(size_alloced); - for (size_t i=0; i<size_active; i++) - ::new (&items[i]) value_type(other.items[i]); - } - - __forceinline vector_t (vector_t&& other) - : alloc(std::move(other.alloc)) - { - size_active = other.size_active; other.size_active = 0; - size_alloced = other.size_alloced; other.size_alloced = 0; - items = other.items; other.items = nullptr; - } - - __forceinline vector_t& operator=(const vector_t& other) - { - resize(other.size_active); - for (size_t i=0; i<size_active; i++) - items[i] = value_type(other.items[i]); - return *this; - } - - __forceinline vector_t& operator=(vector_t&& other) - { - clear(); - alloc = std::move(other.alloc); - size_active = other.size_active; other.size_active = 0; - size_alloced = other.size_alloced; other.size_alloced = 0; - items = other.items; other.items = nullptr; - return *this; - } - - /********************** Iterators ****************************/ - - __forceinline iterator begin() { return items; }; - __forceinline const_iterator begin() const { return items; }; - - __forceinline iterator end () { return items+size_active; }; - __forceinline const_iterator end () const { return items+size_active; }; - - - /********************** Capacity ****************************/ - - __forceinline bool empty () const { return size_active == 0; } - __forceinline size_t size () const { return size_active; } - __forceinline size_t capacity () const { return size_alloced; } - - - __forceinline void resize(size_t new_size) { - internal_resize(new_size,internal_grow_size(new_size)); - } - - __forceinline void reserve(size_t new_alloced) - { - /* do nothing if container already large enough */ - if (new_alloced <= size_alloced) - return; - - /* resize exact otherwise */ - internal_resize(size_active,new_alloced); - } - - __forceinline void shrink_to_fit() { - internal_resize(size_active,size_active); - } - - /******************** Element access **************************/ - - __forceinline T& operator[](size_t i) { assert(i < size_active); return items[i]; } - __forceinline const T& operator[](size_t i) const { assert(i < size_active); return items[i]; } - - __forceinline T& at(size_t i) { assert(i < size_active); return items[i]; } - __forceinline const T& at(size_t i) const { assert(i < size_active); return items[i]; } - - __forceinline T& front() const { assert(size_active > 0); return items[0]; }; - __forceinline T& back () const { assert(size_active > 0); return items[size_active-1]; }; - - __forceinline T* data() { return items; }; - __forceinline const T* data() const { return items; }; - - - /******************** Modifiers **************************/ - - __forceinline void push_back(const T& nt) - { - const T v = nt; // need local copy as input reference could point to this vector - internal_resize(size_active,internal_grow_size(size_active+1)); - ::new (&items[size_active++]) T(v); - } - - __forceinline void pop_back() - { - assert(!empty()); - size_active--; - alloc.destroy(&items[size_active]); - } - - __forceinline void clear() - { - /* destroy elements */ - for (size_t i=0; i<size_active; i++) - alloc.destroy(&items[i]); - - /* free memory */ - alloc.deallocate(items,size_alloced); - items = nullptr; - size_active = size_alloced = 0; - } - - /******************** Comparisons **************************/ - - friend bool operator== (const vector_t& a, const vector_t& b) - { - if (a.size() != b.size()) return false; - for (size_t i=0; i<a.size(); i++) - if (a[i] != b[i]) - return false; - return true; - } - - friend bool operator!= (const vector_t& a, const vector_t& b) { - return !(a==b); - } - - private: - - __forceinline void internal_resize_init(size_t new_active) - { - assert(size_active == 0); - assert(size_alloced == 0); - assert(items == nullptr); - if (new_active == 0) return; - items = alloc.allocate(new_active); - for (size_t i=0; i<new_active; i++) ::new (&items[i]) T(); - size_active = new_active; - size_alloced = new_active; - } - - __forceinline void internal_resize(size_t new_active, size_t new_alloced) - { - assert(new_active <= new_alloced); - - /* destroy elements */ - if (new_active < size_active) - { - for (size_t i=new_active; i<size_active; i++) - alloc.destroy(&items[i]); - size_active = new_active; - } - - /* only reallocate if necessary */ - if (new_alloced == size_alloced) { - for (size_t i=size_active; i<new_active; i++) ::new (&items[i]) T; - size_active = new_active; - return; - } - - /* reallocate and copy items */ - T* old_items = items; - items = alloc.allocate(new_alloced); - for (size_t i=0; i<size_active; i++) { - ::new (&items[i]) T(std::move(old_items[i])); - alloc.destroy(&old_items[i]); - } - - for (size_t i=size_active; i<new_active; i++) { - ::new (&items[i]) T; - } - - alloc.deallocate(old_items,size_alloced); - size_active = new_active; - size_alloced = new_alloced; - } - - __forceinline size_t internal_grow_size(size_t new_alloced) - { - /* do nothing if container already large enough */ - if (new_alloced <= size_alloced) - return size_alloced; - - /* resize to next power of 2 otherwise */ - size_t new_size_alloced = size_alloced; - while (new_size_alloced < new_alloced) { - new_size_alloced = std::max(size_t(1),2*new_size_alloced); - } - return new_size_alloced; - } - - private: - allocator alloc; - size_t size_active; // number of valid items - size_t size_alloced; // number of items allocated - T* items; // data array - }; - - /*! vector class that performs standard allocations */ - template<typename T> - using vector = vector_t<T,std::allocator<T>>; - - /*! vector class that performs aligned allocations */ - template<typename T> - using avector = vector_t<T,aligned_allocator<T,std::alignment_of<T>::value> >; - - /*! vector class that performs OS allocations */ - template<typename T> - using ovector = vector_t<T,os_allocator<T> >; -} |