diff options
author | jfons <joan.fonssanchez@gmail.com> | 2021-04-20 18:38:09 +0200 |
---|---|---|
committer | jfons <joan.fonssanchez@gmail.com> | 2021-04-23 15:57:28 +0200 |
commit | 34b3e8f9e2ae076990ecf3b2827eff759ba2abf9 (patch) | |
tree | 854a526a5ba2d6128e44d995d1bc138cf84ee722 /thirdparty/embree-aarch64/common/tasking | |
parent | eeccab26c5641409092547e02ad11e6253ac1b87 (diff) |
Add Embree-aarch64 thirdparty library
Diffstat (limited to 'thirdparty/embree-aarch64/common/tasking')
6 files changed, 991 insertions, 0 deletions
diff --git a/thirdparty/embree-aarch64/common/tasking/taskscheduler.h b/thirdparty/embree-aarch64/common/tasking/taskscheduler.h new file mode 100644 index 0000000000..9940e068d0 --- /dev/null +++ b/thirdparty/embree-aarch64/common/tasking/taskscheduler.h @@ -0,0 +1,17 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#if defined(TASKING_INTERNAL) +# include "taskschedulerinternal.h" +#elif defined(TASKING_GCD) && defined(BUILD_IOS) +# include "taskschedulergcd.h" +#elif defined(TASKING_TBB) +# include "taskschedulertbb.h" +#elif defined(TASKING_PPL) +# include "taskschedulerppl.h" +#else +# error "no tasking system enabled" +#endif + diff --git a/thirdparty/embree-aarch64/common/tasking/taskschedulergcd.h b/thirdparty/embree-aarch64/common/tasking/taskschedulergcd.h new file mode 100644 index 0000000000..d31f8bb478 --- /dev/null +++ b/thirdparty/embree-aarch64/common/tasking/taskschedulergcd.h @@ -0,0 +1,49 @@ +#pragma once + +#include "../sys/platform.h" +#include "../sys/alloc.h" +#include "../sys/barrier.h" +#include "../sys/thread.h" +#include "../sys/mutex.h" +#include "../sys/condition.h" +#include "../sys/ref.h" + +#include <dispatch/dispatch.h> + +namespace embree +{ + struct TaskScheduler + { + /*! initializes the task scheduler */ + static void create(size_t numThreads, bool set_affinity, bool start_threads); + + /*! destroys the task scheduler again */ + static void destroy() {} + + /* returns the ID of the current thread */ + static __forceinline size_t threadID() + { + return threadIndex(); + } + + /* returns the index (0..threadCount-1) of the current thread */ + static __forceinline size_t threadIndex() + { + currentThreadIndex = (currentThreadIndex + 1) % GCDNumThreads; + return currentThreadIndex; + } + + /* returns the total number of threads */ + static __forceinline size_t threadCount() + { + return GCDNumThreads; + } + + private: + static size_t GCDNumThreads; + static size_t currentThreadIndex; + + }; + +}; + diff --git a/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.cpp b/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.cpp new file mode 100644 index 0000000000..ebf656d1a0 --- /dev/null +++ b/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.cpp @@ -0,0 +1,426 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "taskschedulerinternal.h" +#include "../math/math.h" +#include "../sys/sysinfo.h" +#include <algorithm> + +namespace embree +{ + RTC_NAMESPACE_BEGIN + + static MutexSys g_mutex; + size_t TaskScheduler::g_numThreads = 0; + __thread TaskScheduler* TaskScheduler::g_instance = nullptr; + std::vector<Ref<TaskScheduler>> g_instance_vector; + __thread TaskScheduler::Thread* TaskScheduler::thread_local_thread = nullptr; + TaskScheduler::ThreadPool* TaskScheduler::threadPool = nullptr; + + template<typename Predicate, typename Body> + __forceinline void TaskScheduler::steal_loop(Thread& thread, const Predicate& pred, const Body& body) + { + while (true) + { + /*! some rounds that yield */ + for (size_t i=0; i<32; i++) + { + /*! some spinning rounds */ + const size_t threadCount = thread.threadCount(); + for (size_t j=0; j<1024; j+=threadCount) + { + if (!pred()) return; + if (thread.scheduler->steal_from_other_threads(thread)) { + i=j=0; + body(); + } + } + yield(); + } + } + } + + /*! run this task */ + void TaskScheduler::Task::run_internal (Thread& thread) // FIXME: avoid as many dll_exports as possible + { + /* try to run if not already stolen */ + if (try_switch_state(INITIALIZED,DONE)) + { + Task* prevTask = thread.task; + thread.task = this; + // -- GODOT start -- + // try { + // if (thread.scheduler->cancellingException == nullptr) + closure->execute(); + // } catch (...) { + // if (thread.scheduler->cancellingException == nullptr) + // thread.scheduler->cancellingException = std::current_exception(); + // } + // -- GODOT end -- + thread.task = prevTask; + add_dependencies(-1); + } + + /* steal until all dependencies have completed */ + steal_loop(thread, + [&] () { return dependencies>0; }, + [&] () { while (thread.tasks.execute_local_internal(thread,this)); }); + + /* now signal our parent task that we are finished */ + if (parent) + parent->add_dependencies(-1); + } + + /*! run this task */ + dll_export void TaskScheduler::Task::run (Thread& thread) { + run_internal(thread); + } + + bool TaskScheduler::TaskQueue::execute_local_internal(Thread& thread, Task* parent) + { + /* stop if we run out of local tasks or reach the waiting task */ + if (right == 0 || &tasks[right-1] == parent) + return false; + + /* execute task */ + size_t oldRight = right; + tasks[right-1].run_internal(thread); + if (right != oldRight) { + THROW_RUNTIME_ERROR("you have to wait for spawned subtasks"); + } + + /* pop task and closure from stack */ + right--; + if (tasks[right].stackPtr != size_t(-1)) + stackPtr = tasks[right].stackPtr; + + /* also move left pointer */ + if (left >= right) left.store(right.load()); + + return right != 0; + } + + dll_export bool TaskScheduler::TaskQueue::execute_local(Thread& thread, Task* parent) { + return execute_local_internal(thread,parent); + } + + bool TaskScheduler::TaskQueue::steal(Thread& thread) + { + size_t l = left; + size_t r = right; + if (l < r) + { + l = left++; + if (l >= r) + return false; + } + else + return false; + + if (!tasks[l].try_steal(thread.tasks.tasks[thread.tasks.right])) + return false; + + thread.tasks.right++; + return true; + } + + /* we steal from the left */ + size_t TaskScheduler::TaskQueue::getTaskSizeAtLeft() + { + if (left >= right) return 0; + return tasks[left].N; + } + + void threadPoolFunction(std::pair<TaskScheduler::ThreadPool*,size_t>* pair) + { + TaskScheduler::ThreadPool* pool = pair->first; + size_t threadIndex = pair->second; + delete pair; + pool->thread_loop(threadIndex); + } + + TaskScheduler::ThreadPool::ThreadPool(bool set_affinity) + : numThreads(0), numThreadsRunning(0), set_affinity(set_affinity), running(false) {} + + dll_export void TaskScheduler::ThreadPool::startThreads() + { + if (running) return; + setNumThreads(numThreads,true); + } + + void TaskScheduler::ThreadPool::setNumThreads(size_t newNumThreads, bool startThreads) + { + Lock<MutexSys> lock(g_mutex); + assert(newNumThreads); + newNumThreads = min(newNumThreads, (size_t) getNumberOfLogicalThreads()); + + // We are observing a few % gain by increasing number threads by 2 on aarch64. +#if defined(__aarch64__) && defined(BUILD_IOS) + numThreads = newNumThreads*2; +#else + numThreads = newNumThreads; +#endif + numThreads = newNumThreads; + if (!startThreads && !running) return; + running = true; + size_t numThreadsActive = numThreadsRunning; + + mutex.lock(); + numThreadsRunning = newNumThreads; + mutex.unlock(); + condition.notify_all(); + + /* start new threads */ + for (size_t t=numThreadsActive; t<numThreads; t++) + { + if (t == 0) continue; + auto pair = new std::pair<TaskScheduler::ThreadPool*,size_t>(this,t); + threads.push_back(createThread((thread_func)threadPoolFunction,pair,4*1024*1024,set_affinity ? t : -1)); + } + + /* stop some threads if we reduce the number of threads */ + for (ssize_t t=numThreadsActive-1; t>=ssize_t(numThreadsRunning); t--) { + if (t == 0) continue; + embree::join(threads.back()); + threads.pop_back(); + } + } + + TaskScheduler::ThreadPool::~ThreadPool() + { + /* leave all taskschedulers */ + mutex.lock(); + numThreadsRunning = 0; + mutex.unlock(); + condition.notify_all(); + + /* wait for threads to terminate */ + for (size_t i=0; i<threads.size(); i++) + embree::join(threads[i]); + } + + dll_export void TaskScheduler::ThreadPool::add(const Ref<TaskScheduler>& scheduler) + { + mutex.lock(); + schedulers.push_back(scheduler); + mutex.unlock(); + condition.notify_all(); + } + + dll_export void TaskScheduler::ThreadPool::remove(const Ref<TaskScheduler>& scheduler) + { + Lock<MutexSys> lock(mutex); + for (std::list<Ref<TaskScheduler> >::iterator it = schedulers.begin(); it != schedulers.end(); it++) { + if (scheduler == *it) { + schedulers.erase(it); + return; + } + } + } + + void TaskScheduler::ThreadPool::thread_loop(size_t globalThreadIndex) + { + while (globalThreadIndex < numThreadsRunning) + { + Ref<TaskScheduler> scheduler = NULL; + ssize_t threadIndex = -1; + { + Lock<MutexSys> lock(mutex); + condition.wait(mutex, [&] () { return globalThreadIndex >= numThreadsRunning || !schedulers.empty(); }); + if (globalThreadIndex >= numThreadsRunning) break; + scheduler = schedulers.front(); + threadIndex = scheduler->allocThreadIndex(); + } + scheduler->thread_loop(threadIndex); + } + } + + TaskScheduler::TaskScheduler() + : threadCounter(0), anyTasksRunning(0), hasRootTask(false) + { + threadLocal.resize(2*getNumberOfLogicalThreads()); // FIXME: this has to be 2x as in the compatibility join mode with rtcCommitScene the worker threads also join. When disallowing rtcCommitScene to join a build we can remove the 2x. + for (size_t i=0; i<threadLocal.size(); i++) + threadLocal[i].store(nullptr); + } + + TaskScheduler::~TaskScheduler() + { + assert(threadCounter == 0); + } + + dll_export size_t TaskScheduler::threadID() + { + Thread* thread = TaskScheduler::thread(); + if (thread) return thread->threadIndex; + else return 0; + } + + dll_export size_t TaskScheduler::threadIndex() + { + Thread* thread = TaskScheduler::thread(); + if (thread) return thread->threadIndex; + else return 0; + } + + dll_export size_t TaskScheduler::threadCount() { + return threadPool->size(); + } + + dll_export TaskScheduler* TaskScheduler::instance() + { + if (g_instance == NULL) { + Lock<MutexSys> lock(g_mutex); + g_instance = new TaskScheduler; + g_instance_vector.push_back(g_instance); + } + return g_instance; + } + + void TaskScheduler::create(size_t numThreads, bool set_affinity, bool start_threads) + { + if (!threadPool) threadPool = new TaskScheduler::ThreadPool(set_affinity); + threadPool->setNumThreads(numThreads,start_threads); + } + + void TaskScheduler::destroy() { + delete threadPool; threadPool = nullptr; + } + + dll_export ssize_t TaskScheduler::allocThreadIndex() + { + size_t threadIndex = threadCounter++; + assert(threadIndex < threadLocal.size()); + return threadIndex; + } + + void TaskScheduler::join() + { + mutex.lock(); + size_t threadIndex = allocThreadIndex(); + condition.wait(mutex, [&] () { return hasRootTask.load(); }); + mutex.unlock(); + // -- GODOT start -- + // std::exception_ptr except = thread_loop(threadIndex); + // if (except != nullptr) std::rethrow_exception(except); + thread_loop(threadIndex); + // -- GODOT end -- + } + + void TaskScheduler::reset() { + hasRootTask = false; + } + + void TaskScheduler::wait_for_threads(size_t threadCount) + { + while (threadCounter < threadCount-1) + pause_cpu(); + } + + dll_export TaskScheduler::Thread* TaskScheduler::thread() { + return thread_local_thread; + } + + dll_export TaskScheduler::Thread* TaskScheduler::swapThread(Thread* thread) + { + Thread* old = thread_local_thread; + thread_local_thread = thread; + return old; + } + + dll_export bool TaskScheduler::wait() + { + Thread* thread = TaskScheduler::thread(); + if (thread == nullptr) return true; + while (thread->tasks.execute_local_internal(*thread,thread->task)) {}; + return thread->scheduler->cancellingException == nullptr; + } + +// -- GODOT start -- +// std::exception_ptr TaskScheduler::thread_loop(size_t threadIndex) + void TaskScheduler::thread_loop(size_t threadIndex) +// -- GODOT end -- + { + /* allocate thread structure */ + std::unique_ptr<Thread> mthread(new Thread(threadIndex,this)); // too large for stack allocation + Thread& thread = *mthread; + threadLocal[threadIndex].store(&thread); + Thread* oldThread = swapThread(&thread); + + /* main thread loop */ + while (anyTasksRunning) + { + steal_loop(thread, + [&] () { return anyTasksRunning > 0; }, + [&] () { + anyTasksRunning++; + while (thread.tasks.execute_local_internal(thread,nullptr)); + anyTasksRunning--; + }); + } + threadLocal[threadIndex].store(nullptr); + swapThread(oldThread); + + /* remember exception to throw */ + // -- GODOT start -- + // std::exception_ptr except = nullptr; + // if (cancellingException != nullptr) except = cancellingException; + // -- GODOT end -- + /* wait for all threads to terminate */ + threadCounter--; +#if defined(__WIN32__) + size_t loopIndex = 1; +#endif +#define LOOP_YIELD_THRESHOLD (4096) + while (threadCounter > 0) { +#if defined(__WIN32__) + if ((loopIndex % LOOP_YIELD_THRESHOLD) == 0) + yield(); + else + _mm_pause(); + loopIndex++; +#else + yield(); +#endif + } + // -- GODOT start -- + // return except; + return; + // -- GODOT end -- + } + + bool TaskScheduler::steal_from_other_threads(Thread& thread) + { + const size_t threadIndex = thread.threadIndex; + const size_t threadCount = this->threadCounter; + + for (size_t i=1; i<threadCount; i++) + { + pause_cpu(32); + size_t otherThreadIndex = threadIndex+i; + if (otherThreadIndex >= threadCount) otherThreadIndex -= threadCount; + + Thread* othread = threadLocal[otherThreadIndex].load(); + if (!othread) + continue; + + if (othread->tasks.steal(thread)) + return true; + } + + return false; + } + + dll_export void TaskScheduler::startThreads() { + threadPool->startThreads(); + } + + dll_export void TaskScheduler::addScheduler(const Ref<TaskScheduler>& scheduler) { + threadPool->add(scheduler); + } + + dll_export void TaskScheduler::removeScheduler(const Ref<TaskScheduler>& scheduler) { + threadPool->remove(scheduler); + } + + RTC_NAMESPACE_END +} diff --git a/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.h b/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.h new file mode 100644 index 0000000000..8bd70b2b8c --- /dev/null +++ b/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.h @@ -0,0 +1,386 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../sys/platform.h" +#include "../sys/alloc.h" +#include "../sys/barrier.h" +#include "../sys/thread.h" +#include "../sys/mutex.h" +#include "../sys/condition.h" +#include "../sys/ref.h" +#include "../sys/atomic.h" +#include "../math/range.h" +#include "../../include/embree3/rtcore.h" + +#include <list> + +namespace embree +{ + + /* The tasking system exports some symbols to be used by the tutorials. Thus we + hide is also in the API namespace when requested. */ + RTC_NAMESPACE_BEGIN + + struct TaskScheduler : public RefCount + { + ALIGNED_STRUCT_(64); + friend class Device; + + static const size_t TASK_STACK_SIZE = 4*1024; //!< task structure stack + static const size_t CLOSURE_STACK_SIZE = 512*1024; //!< stack for task closures + + struct Thread; + + /*! virtual interface for all tasks */ + struct TaskFunction { + virtual void execute() = 0; + }; + + /*! builds a task interface from a closure */ + template<typename Closure> + struct ClosureTaskFunction : public TaskFunction + { + Closure closure; + __forceinline ClosureTaskFunction (const Closure& closure) : closure(closure) {} + void execute() { closure(); }; + }; + + struct __aligned(64) Task + { + /*! states a task can be in */ + enum { DONE, INITIALIZED }; + + /*! switch from one state to another */ + __forceinline void switch_state(int from, int to) + { + __memory_barrier(); + MAYBE_UNUSED bool success = state.compare_exchange_strong(from,to); + assert(success); + } + + /*! try to switch from one state to another */ + __forceinline bool try_switch_state(int from, int to) { + __memory_barrier(); + return state.compare_exchange_strong(from,to); + } + + /*! increment/decrement dependency counter */ + void add_dependencies(int n) { + dependencies+=n; + } + + /*! initialize all tasks to DONE state by default */ + __forceinline Task() + : state(DONE) {} + + /*! construction of new task */ + __forceinline Task (TaskFunction* closure, Task* parent, size_t stackPtr, size_t N) + : dependencies(1), stealable(true), closure(closure), parent(parent), stackPtr(stackPtr), N(N) + { + if (parent) parent->add_dependencies(+1); + switch_state(DONE,INITIALIZED); + } + + /*! construction of stolen task, stealing thread will decrement initial dependency */ + __forceinline Task (TaskFunction* closure, Task* parent) + : dependencies(1), stealable(false), closure(closure), parent(parent), stackPtr(-1), N(1) + { + switch_state(DONE,INITIALIZED); + } + + /*! try to steal this task */ + bool try_steal(Task& child) + { + if (!stealable) return false; + if (!try_switch_state(INITIALIZED,DONE)) return false; + new (&child) Task(closure, this); + return true; + } + + /*! run this task */ + dll_export void run(Thread& thread); + + void run_internal(Thread& thread); + + public: + std::atomic<int> state; //!< state this task is in + std::atomic<int> dependencies; //!< dependencies to wait for + std::atomic<bool> stealable; //!< true if task can be stolen + TaskFunction* closure; //!< the closure to execute + Task* parent; //!< parent task to signal when we are finished + size_t stackPtr; //!< stack location where closure is stored + size_t N; //!< approximative size of task + }; + + struct TaskQueue + { + TaskQueue () + : left(0), right(0), stackPtr(0) {} + + __forceinline void* alloc(size_t bytes, size_t align = 64) + { + size_t ofs = bytes + ((align - stackPtr) & (align-1)); + if (stackPtr + ofs > CLOSURE_STACK_SIZE) + // -- GODOT start -- + // throw std::runtime_error("closure stack overflow"); + abort(); + // -- GODOT end -- + stackPtr += ofs; + return &stack[stackPtr-bytes]; + } + + template<typename Closure> + __forceinline void push_right(Thread& thread, const size_t size, const Closure& closure) + { + if (right >= TASK_STACK_SIZE) + // -- GODOT start -- + // throw std::runtime_error("task stack overflow"); + abort(); + // -- GODOT end -- + + /* allocate new task on right side of stack */ + size_t oldStackPtr = stackPtr; + TaskFunction* func = new (alloc(sizeof(ClosureTaskFunction<Closure>))) ClosureTaskFunction<Closure>(closure); + /* gcc 8 or later fails to compile without explicit .load() */ + new (&(tasks[right.load()])) Task(func,thread.task,oldStackPtr,size); + right++; + + /* also move left pointer */ + if (left >= right-1) left = right-1; + } + + dll_export bool execute_local(Thread& thread, Task* parent); + bool execute_local_internal(Thread& thread, Task* parent); + bool steal(Thread& thread); + size_t getTaskSizeAtLeft(); + + bool empty() { return right == 0; } + + public: + + /* task stack */ + Task tasks[TASK_STACK_SIZE]; + __aligned(64) std::atomic<size_t> left; //!< threads steal from left + __aligned(64) std::atomic<size_t> right; //!< new tasks are added to the right + + /* closure stack */ + __aligned(64) char stack[CLOSURE_STACK_SIZE]; + size_t stackPtr; + }; + + /*! thread local structure for each thread */ + struct Thread + { + ALIGNED_STRUCT_(64); + + Thread (size_t threadIndex, const Ref<TaskScheduler>& scheduler) + : threadIndex(threadIndex), task(nullptr), scheduler(scheduler) {} + + __forceinline size_t threadCount() { + return scheduler->threadCounter; + } + + size_t threadIndex; //!< ID of this thread + TaskQueue tasks; //!< local task queue + Task* task; //!< current active task + Ref<TaskScheduler> scheduler; //!< pointer to task scheduler + }; + + /*! pool of worker threads */ + struct ThreadPool + { + ThreadPool (bool set_affinity); + ~ThreadPool (); + + /*! starts the threads */ + dll_export void startThreads(); + + /*! sets number of threads to use */ + void setNumThreads(size_t numThreads, bool startThreads = false); + + /*! adds a task scheduler object for scheduling */ + dll_export void add(const Ref<TaskScheduler>& scheduler); + + /*! remove the task scheduler object again */ + dll_export void remove(const Ref<TaskScheduler>& scheduler); + + /*! returns number of threads of the thread pool */ + size_t size() const { return numThreads; } + + /*! main loop for all threads */ + void thread_loop(size_t threadIndex); + + private: + std::atomic<size_t> numThreads; + std::atomic<size_t> numThreadsRunning; + bool set_affinity; + std::atomic<bool> running; + std::vector<thread_t> threads; + + private: + MutexSys mutex; + ConditionSys condition; + std::list<Ref<TaskScheduler> > schedulers; + }; + + TaskScheduler (); + ~TaskScheduler (); + + /*! initializes the task scheduler */ + static void create(size_t numThreads, bool set_affinity, bool start_threads); + + /*! destroys the task scheduler again */ + static void destroy(); + + /*! lets new worker threads join the tasking system */ + void join(); + void reset(); + + /*! let a worker thread allocate a thread index */ + dll_export ssize_t allocThreadIndex(); + + /*! wait for some number of threads available (threadCount includes main thread) */ + void wait_for_threads(size_t threadCount); + + /*! thread loop for all worker threads */ + // -- GODOT start -- + // std::exception_ptr thread_loop(size_t threadIndex); + void thread_loop(size_t threadIndex); + // -- GODOT end -- + + /*! steals a task from a different thread */ + bool steal_from_other_threads(Thread& thread); + + template<typename Predicate, typename Body> + static void steal_loop(Thread& thread, const Predicate& pred, const Body& body); + + /* spawn a new task at the top of the threads task stack */ + template<typename Closure> + void spawn_root(const Closure& closure, size_t size = 1, bool useThreadPool = true) + { + if (useThreadPool) startThreads(); + + size_t threadIndex = allocThreadIndex(); + std::unique_ptr<Thread> mthread(new Thread(threadIndex,this)); // too large for stack allocation + Thread& thread = *mthread; + assert(threadLocal[threadIndex].load() == nullptr); + threadLocal[threadIndex] = &thread; + Thread* oldThread = swapThread(&thread); + thread.tasks.push_right(thread,size,closure); + { + Lock<MutexSys> lock(mutex); + anyTasksRunning++; + hasRootTask = true; + condition.notify_all(); + } + + if (useThreadPool) addScheduler(this); + + while (thread.tasks.execute_local(thread,nullptr)); + anyTasksRunning--; + if (useThreadPool) removeScheduler(this); + + threadLocal[threadIndex] = nullptr; + swapThread(oldThread); + + /* remember exception to throw */ + std::exception_ptr except = nullptr; + if (cancellingException != nullptr) except = cancellingException; + + /* wait for all threads to terminate */ + threadCounter--; + while (threadCounter > 0) yield(); + cancellingException = nullptr; + + /* re-throw proper exception */ + if (except != nullptr) + std::rethrow_exception(except); + } + + /* spawn a new task at the top of the threads task stack */ + template<typename Closure> + static __forceinline void spawn(size_t size, const Closure& closure) + { + Thread* thread = TaskScheduler::thread(); + if (likely(thread != nullptr)) thread->tasks.push_right(*thread,size,closure); + else instance()->spawn_root(closure,size); + } + + /* spawn a new task at the top of the threads task stack */ + template<typename Closure> + static __forceinline void spawn(const Closure& closure) { + spawn(1,closure); + } + + /* spawn a new task set */ + template<typename Index, typename Closure> + static void spawn(const Index begin, const Index end, const Index blockSize, const Closure& closure) + { + spawn(end-begin, [=]() + { + if (end-begin <= blockSize) { + return closure(range<Index>(begin,end)); + } + const Index center = (begin+end)/2; + spawn(begin,center,blockSize,closure); + spawn(center,end ,blockSize,closure); + wait(); + }); + } + + /* work on spawned subtasks and wait until all have finished */ + dll_export static bool wait(); + + /* returns the ID of the current thread */ + dll_export static size_t threadID(); + + /* returns the index (0..threadCount-1) of the current thread */ + dll_export static size_t threadIndex(); + + /* returns the total number of threads */ + dll_export static size_t threadCount(); + + private: + + /* returns the thread local task list of this worker thread */ + dll_export static Thread* thread(); + + /* sets the thread local task list of this worker thread */ + dll_export static Thread* swapThread(Thread* thread); + + /*! returns the taskscheduler object to be used by the master thread */ + dll_export static TaskScheduler* instance(); + + /*! starts the threads */ + dll_export static void startThreads(); + + /*! adds a task scheduler object for scheduling */ + dll_export static void addScheduler(const Ref<TaskScheduler>& scheduler); + + /*! remove the task scheduler object again */ + dll_export static void removeScheduler(const Ref<TaskScheduler>& scheduler); + + private: + std::vector<atomic<Thread*>> threadLocal; + std::atomic<size_t> threadCounter; + std::atomic<size_t> anyTasksRunning; + std::atomic<bool> hasRootTask; + std::exception_ptr cancellingException; + MutexSys mutex; + ConditionSys condition; + + private: + static size_t g_numThreads; + static __thread TaskScheduler* g_instance; + static __thread Thread* thread_local_thread; + static ThreadPool* threadPool; + }; + + RTC_NAMESPACE_END + +#if defined(RTC_NAMESPACE) + using RTC_NAMESPACE::TaskScheduler; +#endif +} diff --git a/thirdparty/embree-aarch64/common/tasking/taskschedulerppl.h b/thirdparty/embree-aarch64/common/tasking/taskschedulerppl.h new file mode 100644 index 0000000000..776f98cdac --- /dev/null +++ b/thirdparty/embree-aarch64/common/tasking/taskschedulerppl.h @@ -0,0 +1,46 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../sys/platform.h" +#include "../sys/alloc.h" +#include "../sys/barrier.h" +#include "../sys/thread.h" +#include "../sys/mutex.h" +#include "../sys/condition.h" +#include "../sys/ref.h" + +#if !defined(__WIN32__) +#error PPL tasking system only available under windows +#endif + +#include <ppl.h> + +namespace embree +{ + struct TaskScheduler + { + /*! initializes the task scheduler */ + static void create(size_t numThreads, bool set_affinity, bool start_threads); + + /*! destroys the task scheduler again */ + static void destroy(); + + /* returns the ID of the current thread */ + static __forceinline size_t threadID() { + return GetCurrentThreadId(); + } + + /* returns the index (0..threadCount-1) of the current thread */ + /* FIXME: threadIndex is NOT supported by PPL! */ + static __forceinline size_t threadIndex() { + return 0; + } + + /* returns the total number of threads */ + static __forceinline size_t threadCount() { + return GetMaximumProcessorCount(ALL_PROCESSOR_GROUPS) + 1; + } + }; +}; diff --git a/thirdparty/embree-aarch64/common/tasking/taskschedulertbb.h b/thirdparty/embree-aarch64/common/tasking/taskschedulertbb.h new file mode 100644 index 0000000000..98dba26871 --- /dev/null +++ b/thirdparty/embree-aarch64/common/tasking/taskschedulertbb.h @@ -0,0 +1,67 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../sys/platform.h" +#include "../sys/alloc.h" +#include "../sys/barrier.h" +#include "../sys/thread.h" +#include "../sys/mutex.h" +#include "../sys/condition.h" +#include "../sys/ref.h" + +#if defined(__WIN32__) +# define NOMINMAX +#endif + +// We need to define these to avoid implicit linkage against +// tbb_debug.lib under Windows. When removing these lines debug build +// under Windows fails. +#define __TBB_NO_IMPLICIT_LINKAGE 1 +#define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1 +#define TBB_SUPPRESS_DEPRECATED_MESSAGES 1 +#define TBB_PREVIEW_ISOLATED_TASK_GROUP 1 +#include "tbb/tbb.h" +#include "tbb/parallel_sort.h" + +namespace embree +{ + struct TaskScheduler + { + /*! initializes the task scheduler */ + static void create(size_t numThreads, bool set_affinity, bool start_threads); + + /*! destroys the task scheduler again */ + static void destroy(); + + /* returns the ID of the current thread */ + static __forceinline size_t threadID() + { + return threadIndex(); + } + + /* returns the index (0..threadCount-1) of the current thread */ + static __forceinline size_t threadIndex() + { +#if TBB_INTERFACE_VERSION >= 9100 + return tbb::this_task_arena::current_thread_index(); +#elif TBB_INTERFACE_VERSION >= 9000 + return tbb::task_arena::current_thread_index(); +#else + return 0; +#endif + } + + /* returns the total number of threads */ + static __forceinline size_t threadCount() { +#if TBB_INTERFACE_VERSION >= 9100 + return tbb::this_task_arena::max_concurrency(); +#else + return tbb::task_scheduler_init::default_num_threads(); +#endif + } + + }; + +}; |