diff options
Diffstat (limited to 'thirdparty/bullet/LinearMath/TaskScheduler')
4 files changed, 0 insertions, 1667 deletions
diff --git a/thirdparty/bullet/LinearMath/TaskScheduler/btTaskScheduler.cpp b/thirdparty/bullet/LinearMath/TaskScheduler/btTaskScheduler.cpp deleted file mode 100644 index 5f1115c402..0000000000 --- a/thirdparty/bullet/LinearMath/TaskScheduler/btTaskScheduler.cpp +++ /dev/null @@ -1,792 +0,0 @@ - -#include "LinearMath/btMinMax.h" -#include "LinearMath/btAlignedObjectArray.h" -#include "LinearMath/btThreads.h" -#include "LinearMath/btQuickprof.h" -#include <stdio.h> -#include <algorithm> - -#if BT_THREADSAFE - -#include "btThreadSupportInterface.h" - -#if defined(_WIN32) - -#define WIN32_LEAN_AND_MEAN - -#include <windows.h> - -#endif - -typedef unsigned long long btU64; -static const int kCacheLineSize = 64; - -void btSpinPause() -{ -#if defined(_WIN32) - YieldProcessor(); -#endif -} - -struct WorkerThreadStatus -{ - enum Type - { - kInvalid, - kWaitingForWork, - kWorking, - kSleeping, - }; -}; - -ATTRIBUTE_ALIGNED64(class) -WorkerThreadDirectives -{ - static const int kMaxThreadCount = BT_MAX_THREAD_COUNT; - // directives for all worker threads packed into a single cacheline - char m_threadDirs[kMaxThreadCount]; - -public: - enum Type - { - kInvalid, - kGoToSleep, // go to sleep - kStayAwakeButIdle, // wait for not checking job queue - kScanForJobs, // actively scan job queue for jobs - }; - WorkerThreadDirectives() - { - for (int i = 0; i < kMaxThreadCount; ++i) - { - m_threadDirs[i] = 0; - } - } - - Type getDirective(int threadId) - { - btAssert(threadId < kMaxThreadCount); - return static_cast<Type>(m_threadDirs[threadId]); - } - - void setDirectiveByRange(int threadBegin, int threadEnd, Type dir) - { - btAssert(threadBegin < threadEnd); - btAssert(threadEnd <= kMaxThreadCount); - char dirChar = static_cast<char>(dir); - for (int i = threadBegin; i < threadEnd; ++i) - { - m_threadDirs[i] = dirChar; - } - } -}; - -class JobQueue; - -ATTRIBUTE_ALIGNED64(struct) -ThreadLocalStorage -{ - int m_threadId; - WorkerThreadStatus::Type m_status; - int m_numJobsFinished; - btSpinMutex m_mutex; - btScalar m_sumResult; - WorkerThreadDirectives* m_directive; - JobQueue* m_queue; - btClock* m_clock; - unsigned int m_cooldownTime; -}; - -struct IJob -{ - virtual void executeJob(int threadId) = 0; -}; - -class ParallelForJob : public IJob -{ - const btIParallelForBody* m_body; - int m_begin; - int m_end; - -public: - ParallelForJob(int iBegin, int iEnd, const btIParallelForBody& body) - { - m_body = &body; - m_begin = iBegin; - m_end = iEnd; - } - virtual void executeJob(int threadId) BT_OVERRIDE - { - BT_PROFILE("executeJob"); - - // call the functor body to do the work - m_body->forLoop(m_begin, m_end); - } -}; - -class ParallelSumJob : public IJob -{ - const btIParallelSumBody* m_body; - ThreadLocalStorage* m_threadLocalStoreArray; - int m_begin; - int m_end; - -public: - ParallelSumJob(int iBegin, int iEnd, const btIParallelSumBody& body, ThreadLocalStorage* tls) - { - m_body = &body; - m_threadLocalStoreArray = tls; - m_begin = iBegin; - m_end = iEnd; - } - virtual void executeJob(int threadId) BT_OVERRIDE - { - BT_PROFILE("executeJob"); - - // call the functor body to do the work - btScalar val = m_body->sumLoop(m_begin, m_end); -#if BT_PARALLEL_SUM_DETERMINISTISM - // by truncating bits of the result, we can make the parallelSum deterministic (at the expense of precision) - const float TRUNC_SCALE = float(1 << 19); - val = floor(val * TRUNC_SCALE + 0.5f) / TRUNC_SCALE; // truncate some bits -#endif - m_threadLocalStoreArray[threadId].m_sumResult += val; - } -}; - -ATTRIBUTE_ALIGNED64(class) -JobQueue -{ - btThreadSupportInterface* m_threadSupport; - btCriticalSection* m_queueLock; - btSpinMutex m_mutex; - - btAlignedObjectArray<IJob*> m_jobQueue; - char* m_jobMem; - int m_jobMemSize; - bool m_queueIsEmpty; - int m_tailIndex; - int m_headIndex; - int m_allocSize; - bool m_useSpinMutex; - btAlignedObjectArray<JobQueue*> m_neighborContexts; - char m_cachePadding[kCacheLineSize]; // prevent false sharing - - void freeJobMem() - { - if (m_jobMem) - { - // free old - btAlignedFree(m_jobMem); - m_jobMem = NULL; - } - } - void resizeJobMem(int newSize) - { - if (newSize > m_jobMemSize) - { - freeJobMem(); - m_jobMem = static_cast<char*>(btAlignedAlloc(newSize, kCacheLineSize)); - m_jobMemSize = newSize; - } - } - -public: - JobQueue() - { - m_jobMem = NULL; - m_jobMemSize = 0; - m_threadSupport = NULL; - m_queueLock = NULL; - m_headIndex = 0; - m_tailIndex = 0; - m_useSpinMutex = false; - } - ~JobQueue() - { - exit(); - } - void exit() - { - freeJobMem(); - if (m_queueLock && m_threadSupport) - { - m_threadSupport->deleteCriticalSection(m_queueLock); - m_queueLock = NULL; - m_threadSupport = 0; - } - } - - void init(btThreadSupportInterface * threadSup, btAlignedObjectArray<JobQueue> * contextArray) - { - m_threadSupport = threadSup; - if (threadSup) - { - m_queueLock = m_threadSupport->createCriticalSection(); - } - setupJobStealing(contextArray, contextArray->size()); - } - void setupJobStealing(btAlignedObjectArray<JobQueue> * contextArray, int numActiveContexts) - { - btAlignedObjectArray<JobQueue>& contexts = *contextArray; - int selfIndex = 0; - for (int i = 0; i < contexts.size(); ++i) - { - if (this == &contexts[i]) - { - selfIndex = i; - break; - } - } - int numNeighbors = btMin(2, contexts.size() - 1); - int neighborOffsets[] = {-1, 1, -2, 2, -3, 3}; - int numOffsets = sizeof(neighborOffsets) / sizeof(neighborOffsets[0]); - m_neighborContexts.reserve(numNeighbors); - m_neighborContexts.resizeNoInitialize(0); - for (int i = 0; i < numOffsets && m_neighborContexts.size() < numNeighbors; i++) - { - int neighborIndex = selfIndex + neighborOffsets[i]; - if (neighborIndex >= 0 && neighborIndex < numActiveContexts) - { - m_neighborContexts.push_back(&contexts[neighborIndex]); - } - } - } - - bool isQueueEmpty() const { return m_queueIsEmpty; } - void lockQueue() - { - if (m_useSpinMutex) - { - m_mutex.lock(); - } - else - { - m_queueLock->lock(); - } - } - void unlockQueue() - { - if (m_useSpinMutex) - { - m_mutex.unlock(); - } - else - { - m_queueLock->unlock(); - } - } - void clearQueue(int jobCount, int jobSize) - { - lockQueue(); - m_headIndex = 0; - m_tailIndex = 0; - m_allocSize = 0; - m_queueIsEmpty = true; - int jobBufSize = jobSize * jobCount; - // make sure we have enough memory allocated to store jobs - if (jobBufSize > m_jobMemSize) - { - resizeJobMem(jobBufSize); - } - // make sure job queue is big enough - if (jobCount > m_jobQueue.capacity()) - { - m_jobQueue.reserve(jobCount); - } - unlockQueue(); - m_jobQueue.resizeNoInitialize(0); - } - void* allocJobMem(int jobSize) - { - btAssert(m_jobMemSize >= (m_allocSize + jobSize)); - void* jobMem = &m_jobMem[m_allocSize]; - m_allocSize += jobSize; - return jobMem; - } - void submitJob(IJob * job) - { - btAssert(reinterpret_cast<char*>(job) >= &m_jobMem[0] && reinterpret_cast<char*>(job) < &m_jobMem[0] + m_allocSize); - m_jobQueue.push_back(job); - lockQueue(); - m_tailIndex++; - m_queueIsEmpty = false; - unlockQueue(); - } - IJob* consumeJobFromOwnQueue() - { - if (m_queueIsEmpty) - { - // lock free path. even if this is taken erroneously it isn't harmful - return NULL; - } - IJob* job = NULL; - lockQueue(); - if (!m_queueIsEmpty) - { - job = m_jobQueue[m_headIndex++]; - btAssert(reinterpret_cast<char*>(job) >= &m_jobMem[0] && reinterpret_cast<char*>(job) < &m_jobMem[0] + m_allocSize); - if (m_headIndex == m_tailIndex) - { - m_queueIsEmpty = true; - } - } - unlockQueue(); - return job; - } - IJob* consumeJob() - { - if (IJob* job = consumeJobFromOwnQueue()) - { - return job; - } - // own queue is empty, try to steal from neighbor - for (int i = 0; i < m_neighborContexts.size(); ++i) - { - JobQueue* otherContext = m_neighborContexts[i]; - if (IJob* job = otherContext->consumeJobFromOwnQueue()) - { - return job; - } - } - return NULL; - } -}; - -static void WorkerThreadFunc(void* userPtr) -{ - BT_PROFILE("WorkerThreadFunc"); - ThreadLocalStorage* localStorage = (ThreadLocalStorage*)userPtr; - JobQueue* jobQueue = localStorage->m_queue; - - bool shouldSleep = false; - int threadId = localStorage->m_threadId; - while (!shouldSleep) - { - // do work - localStorage->m_mutex.lock(); - while (IJob* job = jobQueue->consumeJob()) - { - localStorage->m_status = WorkerThreadStatus::kWorking; - job->executeJob(threadId); - localStorage->m_numJobsFinished++; - } - localStorage->m_status = WorkerThreadStatus::kWaitingForWork; - localStorage->m_mutex.unlock(); - btU64 clockStart = localStorage->m_clock->getTimeMicroseconds(); - // while queue is empty, - while (jobQueue->isQueueEmpty()) - { - // todo: spin wait a bit to avoid hammering the empty queue - btSpinPause(); - if (localStorage->m_directive->getDirective(threadId) == WorkerThreadDirectives::kGoToSleep) - { - shouldSleep = true; - break; - } - // if jobs are incoming, - if (localStorage->m_directive->getDirective(threadId) == WorkerThreadDirectives::kScanForJobs) - { - clockStart = localStorage->m_clock->getTimeMicroseconds(); // reset clock - } - else - { - for (int i = 0; i < 50; ++i) - { - btSpinPause(); - btSpinPause(); - btSpinPause(); - btSpinPause(); - if (localStorage->m_directive->getDirective(threadId) == WorkerThreadDirectives::kScanForJobs || !jobQueue->isQueueEmpty()) - { - break; - } - } - // if no jobs incoming and queue has been empty for the cooldown time, sleep - btU64 timeElapsed = localStorage->m_clock->getTimeMicroseconds() - clockStart; - if (timeElapsed > localStorage->m_cooldownTime) - { - shouldSleep = true; - break; - } - } - } - } - { - BT_PROFILE("sleep"); - // go sleep - localStorage->m_mutex.lock(); - localStorage->m_status = WorkerThreadStatus::kSleeping; - localStorage->m_mutex.unlock(); - } -} - -class btTaskSchedulerDefault : public btITaskScheduler -{ - btThreadSupportInterface* m_threadSupport; - WorkerThreadDirectives* m_workerDirective; - btAlignedObjectArray<JobQueue> m_jobQueues; - btAlignedObjectArray<JobQueue*> m_perThreadJobQueues; - btAlignedObjectArray<ThreadLocalStorage> m_threadLocalStorage; - btSpinMutex m_antiNestingLock; // prevent nested parallel-for - btClock m_clock; - int m_numThreads; - int m_numWorkerThreads; - int m_numActiveJobQueues; - int m_maxNumThreads; - int m_numJobs; - static const int kFirstWorkerThreadId = 1; - -public: - btTaskSchedulerDefault() : btITaskScheduler("ThreadSupport") - { - m_threadSupport = NULL; - m_workerDirective = NULL; - } - - virtual ~btTaskSchedulerDefault() - { - waitForWorkersToSleep(); - - for (int i = 0; i < m_jobQueues.size(); ++i) - { - m_jobQueues[i].exit(); - } - - if (m_threadSupport) - { - delete m_threadSupport; - m_threadSupport = NULL; - } - if (m_workerDirective) - { - btAlignedFree(m_workerDirective); - m_workerDirective = NULL; - } - } - - void init() - { - btThreadSupportInterface::ConstructionInfo constructionInfo("TaskScheduler", WorkerThreadFunc); - m_threadSupport = btThreadSupportInterface::create(constructionInfo); - m_workerDirective = static_cast<WorkerThreadDirectives*>(btAlignedAlloc(sizeof(*m_workerDirective), 64)); - - m_numWorkerThreads = m_threadSupport->getNumWorkerThreads(); - m_maxNumThreads = m_threadSupport->getNumWorkerThreads() + 1; - m_numThreads = m_maxNumThreads; - // ideal to have one job queue for each physical processor (except for the main thread which needs no queue) - int numThreadsPerQueue = m_threadSupport->getLogicalToPhysicalCoreRatio(); - int numJobQueues = (numThreadsPerQueue == 1) ? (m_maxNumThreads - 1) : (m_maxNumThreads / numThreadsPerQueue); - m_jobQueues.resize(numJobQueues); - m_numActiveJobQueues = numJobQueues; - for (int i = 0; i < m_jobQueues.size(); ++i) - { - m_jobQueues[i].init(m_threadSupport, &m_jobQueues); - } - m_perThreadJobQueues.resize(m_numThreads); - for (int i = 0; i < m_numThreads; i++) - { - JobQueue* jq = NULL; - // only worker threads get a job queue - if (i > 0) - { - if (numThreadsPerQueue == 1) - { - // one queue per worker thread - jq = &m_jobQueues[i - kFirstWorkerThreadId]; - } - else - { - // 2 threads share each queue - jq = &m_jobQueues[i / numThreadsPerQueue]; - } - } - m_perThreadJobQueues[i] = jq; - } - m_threadLocalStorage.resize(m_numThreads); - for (int i = 0; i < m_numThreads; i++) - { - ThreadLocalStorage& storage = m_threadLocalStorage[i]; - storage.m_threadId = i; - storage.m_directive = m_workerDirective; - storage.m_status = WorkerThreadStatus::kSleeping; - storage.m_cooldownTime = 100; // 100 microseconds, threads go to sleep after this long if they have nothing to do - storage.m_clock = &m_clock; - storage.m_queue = m_perThreadJobQueues[i]; - } - setWorkerDirectives(WorkerThreadDirectives::kGoToSleep); // no work for them yet - setNumThreads(m_threadSupport->getCacheFriendlyNumThreads()); - } - - void setWorkerDirectives(WorkerThreadDirectives::Type dir) - { - m_workerDirective->setDirectiveByRange(kFirstWorkerThreadId, m_numThreads, dir); - } - - virtual int getMaxNumThreads() const BT_OVERRIDE - { - return m_maxNumThreads; - } - - virtual int getNumThreads() const BT_OVERRIDE - { - return m_numThreads; - } - - virtual void setNumThreads(int numThreads) BT_OVERRIDE - { - m_numThreads = btMax(btMin(numThreads, int(m_maxNumThreads)), 1); - m_numWorkerThreads = m_numThreads - 1; - m_numActiveJobQueues = 0; - // if there is at least 1 worker, - if (m_numWorkerThreads > 0) - { - // re-setup job stealing between queues to avoid attempting to steal from an inactive job queue - JobQueue* lastActiveContext = m_perThreadJobQueues[m_numThreads - 1]; - int iLastActiveContext = lastActiveContext - &m_jobQueues[0]; - m_numActiveJobQueues = iLastActiveContext + 1; - for (int i = 0; i < m_jobQueues.size(); ++i) - { - m_jobQueues[i].setupJobStealing(&m_jobQueues, m_numActiveJobQueues); - } - } - m_workerDirective->setDirectiveByRange(m_numThreads, BT_MAX_THREAD_COUNT, WorkerThreadDirectives::kGoToSleep); - } - - void waitJobs() - { - BT_PROFILE("waitJobs"); - // have the main thread work until the job queues are empty - int numMainThreadJobsFinished = 0; - for (int i = 0; i < m_numActiveJobQueues; ++i) - { - while (IJob* job = m_jobQueues[i].consumeJob()) - { - job->executeJob(0); - numMainThreadJobsFinished++; - } - } - - // done with jobs for now, tell workers to rest (but not sleep) - setWorkerDirectives(WorkerThreadDirectives::kStayAwakeButIdle); - - btU64 clockStart = m_clock.getTimeMicroseconds(); - // wait for workers to finish any jobs in progress - while (true) - { - int numWorkerJobsFinished = 0; - for (int iThread = kFirstWorkerThreadId; iThread < m_numThreads; ++iThread) - { - ThreadLocalStorage* storage = &m_threadLocalStorage[iThread]; - storage->m_mutex.lock(); - numWorkerJobsFinished += storage->m_numJobsFinished; - storage->m_mutex.unlock(); - } - if (numWorkerJobsFinished + numMainThreadJobsFinished == m_numJobs) - { - break; - } - btU64 timeElapsed = m_clock.getTimeMicroseconds() - clockStart; - btAssert(timeElapsed < 1000); - if (timeElapsed > 100000) - { - break; - } - btSpinPause(); - } - } - - void wakeWorkers(int numWorkersToWake) - { - BT_PROFILE("wakeWorkers"); - btAssert(m_workerDirective->getDirective(1) == WorkerThreadDirectives::kScanForJobs); - int numDesiredWorkers = btMin(numWorkersToWake, m_numWorkerThreads); - int numActiveWorkers = 0; - for (int iWorker = 0; iWorker < m_numWorkerThreads; ++iWorker) - { - // note this count of active workers is not necessarily totally reliable, because a worker thread could be - // just about to put itself to sleep. So we may on occasion fail to wake up all the workers. It should be rare. - ThreadLocalStorage& storage = m_threadLocalStorage[kFirstWorkerThreadId + iWorker]; - if (storage.m_status != WorkerThreadStatus::kSleeping) - { - numActiveWorkers++; - } - } - for (int iWorker = 0; iWorker < m_numWorkerThreads && numActiveWorkers < numDesiredWorkers; ++iWorker) - { - ThreadLocalStorage& storage = m_threadLocalStorage[kFirstWorkerThreadId + iWorker]; - if (storage.m_status == WorkerThreadStatus::kSleeping) - { - m_threadSupport->runTask(iWorker, &storage); - numActiveWorkers++; - } - } - } - - void waitForWorkersToSleep() - { - BT_PROFILE("waitForWorkersToSleep"); - setWorkerDirectives(WorkerThreadDirectives::kGoToSleep); - m_threadSupport->waitForAllTasks(); - for (int i = kFirstWorkerThreadId; i < m_numThreads; i++) - { - ThreadLocalStorage& storage = m_threadLocalStorage[i]; - btAssert(storage.m_status == WorkerThreadStatus::kSleeping); - } - } - - virtual void sleepWorkerThreadsHint() BT_OVERRIDE - { - BT_PROFILE("sleepWorkerThreadsHint"); - // hint the task scheduler that we may not be using these threads for a little while - setWorkerDirectives(WorkerThreadDirectives::kGoToSleep); - } - - void prepareWorkerThreads() - { - for (int i = kFirstWorkerThreadId; i < m_numThreads; ++i) - { - ThreadLocalStorage& storage = m_threadLocalStorage[i]; - storage.m_mutex.lock(); - storage.m_numJobsFinished = 0; - storage.m_mutex.unlock(); - } - setWorkerDirectives(WorkerThreadDirectives::kScanForJobs); - } - - virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody& body) BT_OVERRIDE - { - BT_PROFILE("parallelFor_ThreadSupport"); - btAssert(iEnd >= iBegin); - btAssert(grainSize >= 1); - int iterationCount = iEnd - iBegin; - if (iterationCount > grainSize && m_numWorkerThreads > 0 && m_antiNestingLock.tryLock()) - { - typedef ParallelForJob JobType; - int jobCount = (iterationCount + grainSize - 1) / grainSize; - m_numJobs = jobCount; - btAssert(jobCount >= 2); // need more than one job for multithreading - int jobSize = sizeof(JobType); - - for (int i = 0; i < m_numActiveJobQueues; ++i) - { - m_jobQueues[i].clearQueue(jobCount, jobSize); - } - // prepare worker threads for incoming work - prepareWorkerThreads(); - // submit all of the jobs - int iJob = 0; - int iThread = kFirstWorkerThreadId; // first worker thread - for (int i = iBegin; i < iEnd; i += grainSize) - { - btAssert(iJob < jobCount); - int iE = btMin(i + grainSize, iEnd); - JobQueue* jq = m_perThreadJobQueues[iThread]; - btAssert(jq); - btAssert((jq - &m_jobQueues[0]) < m_numActiveJobQueues); - void* jobMem = jq->allocJobMem(jobSize); - JobType* job = new (jobMem) ParallelForJob(i, iE, body); // placement new - jq->submitJob(job); - iJob++; - iThread++; - if (iThread >= m_numThreads) - { - iThread = kFirstWorkerThreadId; // first worker thread - } - } - wakeWorkers(jobCount - 1); - - // put the main thread to work on emptying the job queue and then wait for all workers to finish - waitJobs(); - m_antiNestingLock.unlock(); - } - else - { - BT_PROFILE("parallelFor_mainThread"); - // just run on main thread - body.forLoop(iBegin, iEnd); - } - } - virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body) BT_OVERRIDE - { - BT_PROFILE("parallelSum_ThreadSupport"); - btAssert(iEnd >= iBegin); - btAssert(grainSize >= 1); - int iterationCount = iEnd - iBegin; - if (iterationCount > grainSize && m_numWorkerThreads > 0 && m_antiNestingLock.tryLock()) - { - typedef ParallelSumJob JobType; - int jobCount = (iterationCount + grainSize - 1) / grainSize; - m_numJobs = jobCount; - btAssert(jobCount >= 2); // need more than one job for multithreading - int jobSize = sizeof(JobType); - for (int i = 0; i < m_numActiveJobQueues; ++i) - { - m_jobQueues[i].clearQueue(jobCount, jobSize); - } - - // initialize summation - for (int iThread = 0; iThread < m_numThreads; ++iThread) - { - m_threadLocalStorage[iThread].m_sumResult = btScalar(0); - } - - // prepare worker threads for incoming work - prepareWorkerThreads(); - // submit all of the jobs - int iJob = 0; - int iThread = kFirstWorkerThreadId; // first worker thread - for (int i = iBegin; i < iEnd; i += grainSize) - { - btAssert(iJob < jobCount); - int iE = btMin(i + grainSize, iEnd); - JobQueue* jq = m_perThreadJobQueues[iThread]; - btAssert(jq); - btAssert((jq - &m_jobQueues[0]) < m_numActiveJobQueues); - void* jobMem = jq->allocJobMem(jobSize); - JobType* job = new (jobMem) ParallelSumJob(i, iE, body, &m_threadLocalStorage[0]); // placement new - jq->submitJob(job); - iJob++; - iThread++; - if (iThread >= m_numThreads) - { - iThread = kFirstWorkerThreadId; // first worker thread - } - } - wakeWorkers(jobCount - 1); - - // put the main thread to work on emptying the job queue and then wait for all workers to finish - waitJobs(); - - // add up all the thread sums - btScalar sum = btScalar(0); - for (int iThread = 0; iThread < m_numThreads; ++iThread) - { - sum += m_threadLocalStorage[iThread].m_sumResult; - } - m_antiNestingLock.unlock(); - return sum; - } - else - { - BT_PROFILE("parallelSum_mainThread"); - // just run on main thread - return body.sumLoop(iBegin, iEnd); - } - } -}; - -btITaskScheduler* btCreateDefaultTaskScheduler() -{ - btTaskSchedulerDefault* ts = new btTaskSchedulerDefault(); - ts->init(); - return ts; -} - -#else // #if BT_THREADSAFE - -btITaskScheduler* btCreateDefaultTaskScheduler() -{ - return NULL; -} - -#endif // #else // #if BT_THREADSAFE diff --git a/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportInterface.h b/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportInterface.h deleted file mode 100644 index 1fe49335a1..0000000000 --- a/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportInterface.h +++ /dev/null @@ -1,64 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef BT_THREAD_SUPPORT_INTERFACE_H -#define BT_THREAD_SUPPORT_INTERFACE_H - -class btCriticalSection -{ -public: - btCriticalSection() {} - virtual ~btCriticalSection() {} - - virtual void lock() = 0; - virtual void unlock() = 0; -}; - -class btThreadSupportInterface -{ -public: - virtual ~btThreadSupportInterface() {} - - virtual int getNumWorkerThreads() const = 0; // number of worker threads (total number of logical processors - 1) - virtual int getCacheFriendlyNumThreads() const = 0; // the number of logical processors sharing a single L3 cache - virtual int getLogicalToPhysicalCoreRatio() const = 0; // the number of logical processors per physical processor (usually 1 or 2) - virtual void runTask(int threadIndex, void* userData) = 0; - virtual void waitForAllTasks() = 0; - - virtual btCriticalSection* createCriticalSection() = 0; - virtual void deleteCriticalSection(btCriticalSection* criticalSection) = 0; - - typedef void (*ThreadFunc)(void* userPtr); - - struct ConstructionInfo - { - ConstructionInfo(const char* uniqueName, - ThreadFunc userThreadFunc, - int threadStackSize = 65535) - : m_uniqueName(uniqueName), - m_userThreadFunc(userThreadFunc), - m_threadStackSize(threadStackSize) - { - } - - const char* m_uniqueName; - ThreadFunc m_userThreadFunc; - int m_threadStackSize; - }; - - static btThreadSupportInterface* create(const ConstructionInfo& info); -}; - -#endif //BT_THREAD_SUPPORT_INTERFACE_H diff --git a/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportPosix.cpp b/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportPosix.cpp deleted file mode 100644 index a03f6dc570..0000000000 --- a/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportPosix.cpp +++ /dev/null @@ -1,353 +0,0 @@ - -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#if BT_THREADSAFE && !defined(_WIN32) - -#include "LinearMath/btScalar.h" -#include "LinearMath/btAlignedObjectArray.h" -#include "LinearMath/btThreads.h" -#include "LinearMath/btMinMax.h" -#include "btThreadSupportInterface.h" - -#include <stdio.h> -#include <errno.h> -#include <unistd.h> - -#ifndef _XOPEN_SOURCE -#define _XOPEN_SOURCE 600 //for definition of pthread_barrier_t, see http://pages.cs.wisc.edu/~travitch/pthreads_primer.html -#endif //_XOPEN_SOURCE -#include <pthread.h> -#include <semaphore.h> -#include <unistd.h> //for sysconf - -/// -/// getNumHardwareThreads() -/// -/// -/// https://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine -/// -#if __cplusplus >= 201103L - -#include <thread> - -int btGetNumHardwareThreads() -{ - return btMax(1u, btMin(BT_MAX_THREAD_COUNT, std::thread::hardware_concurrency())); -} - -#else - -int btGetNumHardwareThreads() -{ - return btMax(1, btMin<int>(BT_MAX_THREAD_COUNT, sysconf(_SC_NPROCESSORS_ONLN))); -} - -#endif - -// btThreadSupportPosix helps to initialize/shutdown libspe2, start/stop SPU tasks and communication -class btThreadSupportPosix : public btThreadSupportInterface -{ -public: - struct btThreadStatus - { - int m_taskId; - int m_commandId; - int m_status; - - ThreadFunc m_userThreadFunc; - void* m_userPtr; //for taskDesc etc - - pthread_t thread; - //each tread will wait until this signal to start its work - sem_t* startSemaphore; - btCriticalSection* m_cs; - // this is a copy of m_mainSemaphore, - //each tread will signal once it is finished with its work - sem_t* m_mainSemaphore; - unsigned long threadUsed; - }; - -private: - typedef unsigned long long UINT64; - - btAlignedObjectArray<btThreadStatus> m_activeThreadStatus; - // m_mainSemaphoresemaphore will signal, if and how many threads are finished with their work - sem_t* m_mainSemaphore; - int m_numThreads; - UINT64 m_startedThreadsMask; - void startThreads(const ConstructionInfo& threadInfo); - void stopThreads(); - int waitForResponse(); - btCriticalSection* m_cs; -public: - btThreadSupportPosix(const ConstructionInfo& threadConstructionInfo); - virtual ~btThreadSupportPosix(); - - virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; } - // TODO: return the number of logical processors sharing the first L3 cache - virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return m_numThreads + 1; } - // TODO: detect if CPU has hyperthreading enabled - virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return 1; } - - virtual void runTask(int threadIndex, void* userData) BT_OVERRIDE; - virtual void waitForAllTasks() BT_OVERRIDE; - - virtual btCriticalSection* createCriticalSection() BT_OVERRIDE; - virtual void deleteCriticalSection(btCriticalSection* criticalSection) BT_OVERRIDE; -}; - -#define checkPThreadFunction(returnValue) \ - if (0 != returnValue) \ - { \ - printf("PThread problem at line %i in file %s: %i %d\n", __LINE__, __FILE__, returnValue, errno); \ - } - -// The number of threads should be equal to the number of available cores -// Todo: each worker should be linked to a single core, using SetThreadIdealProcessor. - -btThreadSupportPosix::btThreadSupportPosix(const ConstructionInfo& threadConstructionInfo) -{ - m_cs = createCriticalSection(); - startThreads(threadConstructionInfo); -} - -// cleanup/shutdown Libspe2 -btThreadSupportPosix::~btThreadSupportPosix() -{ - stopThreads(); - deleteCriticalSection(m_cs); - m_cs=0; -} - -#if (defined(__APPLE__)) -#define NAMED_SEMAPHORES -#endif - -static sem_t* createSem(const char* baseName) -{ - static int semCount = 0; -#ifdef NAMED_SEMAPHORES - /// Named semaphore begin - char name[32]; - snprintf(name, 32, "/%8.s-%4.d-%4.4d", baseName, getpid(), semCount++); - sem_t* tempSem = sem_open(name, O_CREAT, 0600, 0); - - if (tempSem != reinterpret_cast<sem_t*>(SEM_FAILED)) - { - // printf("Created \"%s\" Semaphore %p\n", name, tempSem); - } - else - { - //printf("Error creating Semaphore %d\n", errno); - exit(-1); - } - /// Named semaphore end -#else - sem_t* tempSem = new sem_t; - checkPThreadFunction(sem_init(tempSem, 0, 0)); -#endif - return tempSem; -} - -static void destroySem(sem_t* semaphore) -{ -#ifdef NAMED_SEMAPHORES - checkPThreadFunction(sem_close(semaphore)); -#else - checkPThreadFunction(sem_destroy(semaphore)); - delete semaphore; -#endif -} - -static void* threadFunction(void* argument) -{ - btThreadSupportPosix::btThreadStatus* status = (btThreadSupportPosix::btThreadStatus*)argument; - - while (1) - { - checkPThreadFunction(sem_wait(status->startSemaphore)); - void* userPtr = status->m_userPtr; - - if (userPtr) - { - btAssert(status->m_status); - status->m_userThreadFunc(userPtr); - status->m_cs->lock(); - status->m_status = 2; - status->m_cs->unlock(); - checkPThreadFunction(sem_post(status->m_mainSemaphore)); - status->threadUsed++; - } - else - { - //exit Thread - status->m_cs->lock(); - status->m_status = 3; - status->m_cs->unlock(); - checkPThreadFunction(sem_post(status->m_mainSemaphore)); - break; - } - } - - return 0; -} - -///send messages to SPUs -void btThreadSupportPosix::runTask(int threadIndex, void* userData) -{ - ///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished - btThreadStatus& threadStatus = m_activeThreadStatus[threadIndex]; - btAssert(threadIndex >= 0); - btAssert(threadIndex < m_activeThreadStatus.size()); - threadStatus.m_cs = m_cs; - threadStatus.m_commandId = 1; - threadStatus.m_status = 1; - threadStatus.m_userPtr = userData; - m_startedThreadsMask |= UINT64(1) << threadIndex; - - // fire event to start new task - checkPThreadFunction(sem_post(threadStatus.startSemaphore)); -} - -///check for messages from SPUs -int btThreadSupportPosix::waitForResponse() -{ - ///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response - ///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback' - - btAssert(m_activeThreadStatus.size()); - - // wait for any of the threads to finish - checkPThreadFunction(sem_wait(m_mainSemaphore)); - // get at least one thread which has finished - size_t last = -1; - - for (size_t t = 0; t < size_t(m_activeThreadStatus.size()); ++t) - { - m_cs->lock(); - bool hasFinished = (2 == m_activeThreadStatus[t].m_status); - m_cs->unlock(); - if (hasFinished) - { - last = t; - break; - } - } - - btThreadStatus& threadStatus = m_activeThreadStatus[last]; - - btAssert(threadStatus.m_status > 1); - threadStatus.m_status = 0; - - // need to find an active spu - btAssert(last >= 0); - m_startedThreadsMask &= ~(UINT64(1) << last); - - return last; -} - -void btThreadSupportPosix::waitForAllTasks() -{ - while (m_startedThreadsMask) - { - waitForResponse(); - } -} - -void btThreadSupportPosix::startThreads(const ConstructionInfo& threadConstructionInfo) -{ - m_numThreads = btGetNumHardwareThreads() - 1; // main thread exists already - m_activeThreadStatus.resize(m_numThreads); - m_startedThreadsMask = 0; - - m_mainSemaphore = createSem("main"); - //checkPThreadFunction(sem_wait(mainSemaphore)); - - for (int i = 0; i < m_numThreads; i++) - { - btThreadStatus& threadStatus = m_activeThreadStatus[i]; - threadStatus.startSemaphore = createSem("threadLocal"); - threadStatus.m_userPtr = 0; - threadStatus.m_cs = m_cs; - threadStatus.m_taskId = i; - threadStatus.m_commandId = 0; - threadStatus.m_status = 0; - threadStatus.m_mainSemaphore = m_mainSemaphore; - threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc; - threadStatus.threadUsed = 0; - checkPThreadFunction(pthread_create(&threadStatus.thread, NULL, &threadFunction, (void*)&threadStatus)); - - } -} - -///tell the task scheduler we are done with the SPU tasks -void btThreadSupportPosix::stopThreads() -{ - for (size_t t = 0; t < size_t(m_activeThreadStatus.size()); ++t) - { - btThreadStatus& threadStatus = m_activeThreadStatus[t]; - - threadStatus.m_userPtr = 0; - checkPThreadFunction(sem_post(threadStatus.startSemaphore)); - checkPThreadFunction(sem_wait(m_mainSemaphore)); - - checkPThreadFunction(pthread_join(threadStatus.thread, 0)); - destroySem(threadStatus.startSemaphore); - } - destroySem(m_mainSemaphore); - m_activeThreadStatus.clear(); -} - -class btCriticalSectionPosix : public btCriticalSection -{ - pthread_mutex_t m_mutex; - -public: - btCriticalSectionPosix() - { - pthread_mutex_init(&m_mutex, NULL); - } - virtual ~btCriticalSectionPosix() - { - pthread_mutex_destroy(&m_mutex); - } - - virtual void lock() - { - pthread_mutex_lock(&m_mutex); - } - virtual void unlock() - { - pthread_mutex_unlock(&m_mutex); - } -}; - -btCriticalSection* btThreadSupportPosix::createCriticalSection() -{ - return new btCriticalSectionPosix(); -} - -void btThreadSupportPosix::deleteCriticalSection(btCriticalSection* cs) -{ - delete cs; -} - -btThreadSupportInterface* btThreadSupportInterface::create(const ConstructionInfo& info) -{ - return new btThreadSupportPosix(info); -} - -#endif // BT_THREADSAFE && !defined( _WIN32 ) diff --git a/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportWin32.cpp b/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportWin32.cpp deleted file mode 100644 index 5862264a67..0000000000 --- a/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportWin32.cpp +++ /dev/null @@ -1,458 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#if defined(_WIN32) && BT_THREADSAFE - -#include "LinearMath/btScalar.h" -#include "LinearMath/btMinMax.h" -#include "LinearMath/btAlignedObjectArray.h" -#include "LinearMath/btThreads.h" -#include "btThreadSupportInterface.h" -#include <windows.h> -#include <stdio.h> - -struct btProcessorInfo -{ - int numLogicalProcessors; - int numCores; - int numNumaNodes; - int numL1Cache; - int numL2Cache; - int numL3Cache; - int numPhysicalPackages; - static const int maxNumTeamMasks = 32; - int numTeamMasks; - UINT64 processorTeamMasks[maxNumTeamMasks]; -}; - -UINT64 getProcessorTeamMask(const btProcessorInfo& procInfo, int procId) -{ - UINT64 procMask = UINT64(1) << procId; - for (int i = 0; i < procInfo.numTeamMasks; ++i) - { - if (procMask & procInfo.processorTeamMasks[i]) - { - return procInfo.processorTeamMasks[i]; - } - } - return 0; -} - -int getProcessorTeamIndex(const btProcessorInfo& procInfo, int procId) -{ - UINT64 procMask = UINT64(1) << procId; - for (int i = 0; i < procInfo.numTeamMasks; ++i) - { - if (procMask & procInfo.processorTeamMasks[i]) - { - return i; - } - } - return -1; -} - -int countSetBits(ULONG64 bits) -{ - int count = 0; - while (bits) - { - if (bits & 1) - { - count++; - } - bits >>= 1; - } - return count; -} - -typedef BOOL(WINAPI* Pfn_GetLogicalProcessorInformation)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); - -void getProcessorInformation(btProcessorInfo* procInfo) -{ - memset(procInfo, 0, sizeof(*procInfo)); -#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && \ - !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) - // Can't dlopen libraries on UWP. - return; -#else - Pfn_GetLogicalProcessorInformation getLogicalProcInfo = - (Pfn_GetLogicalProcessorInformation)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation"); - if (getLogicalProcInfo == NULL) - { - // no info - return; - } - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buf = NULL; - DWORD bufSize = 0; - while (true) - { - if (getLogicalProcInfo(buf, &bufSize)) - { - break; - } - else - { - if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) - { - if (buf) - { - free(buf); - } - buf = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(bufSize); - } - } - } - - int len = bufSize / sizeof(*buf); - for (int i = 0; i < len; ++i) - { - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION info = buf + i; - switch (info->Relationship) - { - case RelationNumaNode: - procInfo->numNumaNodes++; - break; - - case RelationProcessorCore: - procInfo->numCores++; - procInfo->numLogicalProcessors += countSetBits(info->ProcessorMask); - break; - - case RelationCache: - if (info->Cache.Level == 1) - { - procInfo->numL1Cache++; - } - else if (info->Cache.Level == 2) - { - procInfo->numL2Cache++; - } - else if (info->Cache.Level == 3) - { - procInfo->numL3Cache++; - // processors that share L3 cache are considered to be on the same team - // because they can more easily work together on the same data. - // Large performance penalties will occur if 2 or more threads from different - // teams attempt to frequently read and modify the same cache lines. - // - // On the AMD Ryzen 7 CPU for example, the 8 cores on the CPU are split into - // 2 CCX units of 4 cores each. Each CCX has a separate L3 cache, so if both - // CCXs are operating on the same data, many cycles will be spent keeping the - // two caches coherent. - if (procInfo->numTeamMasks < btProcessorInfo::maxNumTeamMasks) - { - procInfo->processorTeamMasks[procInfo->numTeamMasks] = info->ProcessorMask; - procInfo->numTeamMasks++; - } - } - break; - - case RelationProcessorPackage: - procInfo->numPhysicalPackages++; - break; - } - } - free(buf); -#endif -} - -///btThreadSupportWin32 helps to initialize/shutdown libspe2, start/stop SPU tasks and communication -class btThreadSupportWin32 : public btThreadSupportInterface -{ -public: - struct btThreadStatus - { - int m_taskId; - int m_commandId; - int m_status; - - ThreadFunc m_userThreadFunc; - void* m_userPtr; //for taskDesc etc - - void* m_threadHandle; //this one is calling 'Win32ThreadFunc' - - void* m_eventStartHandle; - char m_eventStartHandleName[32]; - - void* m_eventCompleteHandle; - char m_eventCompleteHandleName[32]; - }; - -private: - btAlignedObjectArray<btThreadStatus> m_activeThreadStatus; - btAlignedObjectArray<void*> m_completeHandles; - int m_numThreads; - DWORD_PTR m_startedThreadMask; - btProcessorInfo m_processorInfo; - - void startThreads(const ConstructionInfo& threadInfo); - void stopThreads(); - int waitForResponse(); - -public: - btThreadSupportWin32(const ConstructionInfo& threadConstructionInfo); - virtual ~btThreadSupportWin32(); - - virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; } - virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return countSetBits(m_processorInfo.processorTeamMasks[0]); } - virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return m_processorInfo.numLogicalProcessors / m_processorInfo.numCores; } - - virtual void runTask(int threadIndex, void* userData) BT_OVERRIDE; - virtual void waitForAllTasks() BT_OVERRIDE; - - virtual btCriticalSection* createCriticalSection() BT_OVERRIDE; - virtual void deleteCriticalSection(btCriticalSection* criticalSection) BT_OVERRIDE; -}; - -btThreadSupportWin32::btThreadSupportWin32(const ConstructionInfo& threadConstructionInfo) -{ - startThreads(threadConstructionInfo); -} - -btThreadSupportWin32::~btThreadSupportWin32() -{ - stopThreads(); -} - -DWORD WINAPI win32threadStartFunc(LPVOID lpParam) -{ - btThreadSupportWin32::btThreadStatus* status = (btThreadSupportWin32::btThreadStatus*)lpParam; - - while (1) - { - WaitForSingleObject(status->m_eventStartHandle, INFINITE); - void* userPtr = status->m_userPtr; - - if (userPtr) - { - btAssert(status->m_status); - status->m_userThreadFunc(userPtr); - status->m_status = 2; - SetEvent(status->m_eventCompleteHandle); - } - else - { - //exit Thread - status->m_status = 3; - printf("Thread with taskId %i with handle %p exiting\n", status->m_taskId, status->m_threadHandle); - SetEvent(status->m_eventCompleteHandle); - break; - } - } - printf("Thread TERMINATED\n"); - return 0; -} - -void btThreadSupportWin32::runTask(int threadIndex, void* userData) -{ - btThreadStatus& threadStatus = m_activeThreadStatus[threadIndex]; - btAssert(threadIndex >= 0); - btAssert(int(threadIndex) < m_activeThreadStatus.size()); - - threadStatus.m_commandId = 1; - threadStatus.m_status = 1; - threadStatus.m_userPtr = userData; - m_startedThreadMask |= DWORD_PTR(1) << threadIndex; - - ///fire event to start new task - SetEvent(threadStatus.m_eventStartHandle); -} - -int btThreadSupportWin32::waitForResponse() -{ - btAssert(m_activeThreadStatus.size()); - - int last = -1; - DWORD res = WaitForMultipleObjects(m_completeHandles.size(), &m_completeHandles[0], FALSE, INFINITE); - btAssert(res != WAIT_FAILED); - last = res - WAIT_OBJECT_0; - - btThreadStatus& threadStatus = m_activeThreadStatus[last]; - btAssert(threadStatus.m_threadHandle); - btAssert(threadStatus.m_eventCompleteHandle); - - //WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE); - btAssert(threadStatus.m_status > 1); - threadStatus.m_status = 0; - - ///need to find an active spu - btAssert(last >= 0); - m_startedThreadMask &= ~(DWORD_PTR(1) << last); - - return last; -} - -void btThreadSupportWin32::waitForAllTasks() -{ - while (m_startedThreadMask) - { - waitForResponse(); - } -} - -void btThreadSupportWin32::startThreads(const ConstructionInfo& threadConstructionInfo) -{ - static int uniqueId = 0; - uniqueId++; - btProcessorInfo& procInfo = m_processorInfo; - getProcessorInformation(&procInfo); - DWORD_PTR dwProcessAffinityMask = 0; - DWORD_PTR dwSystemAffinityMask = 0; - if (!GetProcessAffinityMask(GetCurrentProcess(), &dwProcessAffinityMask, &dwSystemAffinityMask)) - { - dwProcessAffinityMask = 0; - } - ///The number of threads should be equal to the number of available cores - 1 - m_numThreads = btMin(procInfo.numLogicalProcessors, int(BT_MAX_THREAD_COUNT)) - 1; // cap to max thread count (-1 because main thread already exists) - - m_activeThreadStatus.resize(m_numThreads); - m_completeHandles.resize(m_numThreads); - m_startedThreadMask = 0; - - // set main thread affinity - if (DWORD_PTR mask = dwProcessAffinityMask & getProcessorTeamMask(procInfo, 0)) - { - SetThreadAffinityMask(GetCurrentThread(), mask); - SetThreadIdealProcessor(GetCurrentThread(), 0); - } - - for (int i = 0; i < m_numThreads; i++) - { - printf("starting thread %d\n", i); - - btThreadStatus& threadStatus = m_activeThreadStatus[i]; - - LPSECURITY_ATTRIBUTES lpThreadAttributes = NULL; - SIZE_T dwStackSize = threadConstructionInfo.m_threadStackSize; - LPTHREAD_START_ROUTINE lpStartAddress = &win32threadStartFunc; - LPVOID lpParameter = &threadStatus; - DWORD dwCreationFlags = 0; - LPDWORD lpThreadId = 0; - - threadStatus.m_userPtr = 0; - - sprintf(threadStatus.m_eventStartHandleName, "es%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i); - threadStatus.m_eventStartHandle = CreateEventA(0, false, false, threadStatus.m_eventStartHandleName); - - sprintf(threadStatus.m_eventCompleteHandleName, "ec%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i); - threadStatus.m_eventCompleteHandle = CreateEventA(0, false, false, threadStatus.m_eventCompleteHandleName); - - m_completeHandles[i] = threadStatus.m_eventCompleteHandle; - - HANDLE handle = CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress, lpParameter, dwCreationFlags, lpThreadId); - //SetThreadPriority( handle, THREAD_PRIORITY_HIGHEST ); - // highest priority -- can cause erratic performance when numThreads > numCores - // we don't want worker threads to be higher priority than the main thread or the main thread could get - // totally shut out and unable to tell the workers to stop - //SetThreadPriority( handle, THREAD_PRIORITY_BELOW_NORMAL ); - - { - int processorId = i + 1; // leave processor 0 for main thread - DWORD_PTR teamMask = getProcessorTeamMask(procInfo, processorId); - if (teamMask) - { - // bind each thread to only execute on processors of it's assigned team - // - for single-socket Intel x86 CPUs this has no effect (only a single, shared L3 cache so there is only 1 team) - // - for multi-socket Intel this will keep threads from migrating from one socket to another - // - for AMD Ryzen this will keep threads from migrating from one CCX to another - DWORD_PTR mask = teamMask & dwProcessAffinityMask; - if (mask) - { - SetThreadAffinityMask(handle, mask); - } - } - SetThreadIdealProcessor(handle, processorId); - } - - threadStatus.m_taskId = i; - threadStatus.m_commandId = 0; - threadStatus.m_status = 0; - threadStatus.m_threadHandle = handle; - threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc; - - printf("started %s thread %d with threadHandle %p\n", threadConstructionInfo.m_uniqueName, i, handle); - } -} - -///tell the task scheduler we are done with the SPU tasks -void btThreadSupportWin32::stopThreads() -{ - for (int i = 0; i < m_activeThreadStatus.size(); i++) - { - btThreadStatus& threadStatus = m_activeThreadStatus[i]; - if (threadStatus.m_status > 0) - { - WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE); - } - - threadStatus.m_userPtr = NULL; - SetEvent(threadStatus.m_eventStartHandle); - WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE); - - CloseHandle(threadStatus.m_eventCompleteHandle); - CloseHandle(threadStatus.m_eventStartHandle); - CloseHandle(threadStatus.m_threadHandle); - } - - m_activeThreadStatus.clear(); - m_completeHandles.clear(); -} - -class btWin32CriticalSection : public btCriticalSection -{ -private: - CRITICAL_SECTION mCriticalSection; - -public: - btWin32CriticalSection() - { - InitializeCriticalSection(&mCriticalSection); - } - - ~btWin32CriticalSection() - { - DeleteCriticalSection(&mCriticalSection); - } - - void lock() - { - EnterCriticalSection(&mCriticalSection); - } - - void unlock() - { - LeaveCriticalSection(&mCriticalSection); - } -}; - -btCriticalSection* btThreadSupportWin32::createCriticalSection() -{ - unsigned char* mem = (unsigned char*)btAlignedAlloc(sizeof(btWin32CriticalSection), 16); - btWin32CriticalSection* cs = new (mem) btWin32CriticalSection(); - return cs; -} - -void btThreadSupportWin32::deleteCriticalSection(btCriticalSection* criticalSection) -{ - criticalSection->~btCriticalSection(); - btAlignedFree(criticalSection); -} - -btThreadSupportInterface* btThreadSupportInterface::create(const ConstructionInfo& info) -{ - return new btThreadSupportWin32(info); -} - -#endif //defined(_WIN32) && BT_THREADSAFE |